nnetsauce

  1from .base.base import Base
  2from .base.baseRegressor import BaseRegressor
  3from .boosting.adaBoostClassifier import AdaBoostClassifier
  4from .custom.customClassifier import CustomClassifier
  5from .custom.customRegressor import CustomRegressor
  6from .custom.customBackpropRegressor import CustomBackPropRegressor
  7from .datasets import Downloader
  8from .deep.deepClassifier import DeepClassifier
  9from .deep.deepRegressor import DeepRegressor
 10from .deep.deepMTS import DeepMTS
 11from .elasticnet2.enet2 import ElasticNet2Regressor
 12from .glm.glmClassifier import GLMClassifier
 13from .glm.glmRegressor import GLMRegressor
 14from .kernel.kernel import KernelRidge
 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier
 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor
 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier
 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor
 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS
 20from .mts.mts import MTS
 21from .mts.mlarch import MLARCH
 22from .mts.classical import ClassicalMTS
 23from .mts.stackedmts import MTSStacker
 24from .mts.multioutputmts import MultiOutputMTS
 25from .mts.discretetokenmts import DiscreteTokenMTS
 26from .multitask.multitaskClassifier import MultitaskClassifier
 27from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier
 28from .neuralnet.neuralnetregression import NeuralNetRegressor
 29from .neuralnet.neuralnetclassification import NeuralNetClassifier
 30from .optimizers.optimizer import Optimizer
 31from .predictioninterval import PredictionInterval
 32from .predictionset import PredictionSet
 33from .quantile.quantileregression import QuantileRegressor
 34from .quantile.quantileclassification import QuantileClassifier
 35from .randombag.randomBagClassifier import RandomBagClassifier
 36from .randombag.randomBagRegressor import RandomBagRegressor
 37from .randomfourier.randomfourier import RandomFourierEstimator
 38from .rff.rffridge import (
 39    RandomFourierFeaturesRidge,
 40    RandomFourierFeaturesRidgeGCV,
 41)
 42from .ridge.ridge import RidgeRegressor
 43from .ridge2.ridge2Classifier import Ridge2Classifier
 44from .ridge2.ridge2Regressor import Ridge2Regressor
 45from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier
 46from .ridge2.ridge2MTSJAX import Ridge2Forecaster
 47from .ridge2.ridge2multioutputregressor import Ridge2MultiOutputRegressor
 48from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor
 49from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor
 50from .sampling import SubSampler
 51from .updater import RegressorUpdater, ClassifierUpdater
 52from .votingregressor import MedianVotingRegressor
 53
 54__all__ = [
 55    "AdaBoostClassifier",
 56    "Base",
 57    "BaseRegressor",
 58    "BayesianRVFLRegressor",
 59    "BayesianRVFL2Regressor",
 60    "ClassicalMTS",
 61    "CustomClassifier",
 62    "CustomRegressor",
 63    "CustomBackPropRegressor",
 64    "DeepClassifier",
 65    "DeepRegressor",
 66    "DeepMTS",
 67    "DiscreteTokenMTS",
 68    "Downloader",
 69    "ElasticNet2Regressor",
 70    "GLMClassifier",
 71    "GLMRegressor",
 72    "KernelRidge",
 73    "LazyClassifier",
 74    "LazyRegressor",
 75    "LazyDeepClassifier",
 76    "LazyDeepRegressor",
 77    "LazyMTS",
 78    "LazyDeepMTS",
 79    "MLARCH",
 80    "MedianVotingRegressor",
 81    "MTS",
 82    "MTSStacker",
 83    "MultiOutputMTS",
 84    "MultitaskClassifier",
 85    "NeuralNetRegressor",
 86    "NeuralNetClassifier",
 87    "PredictionInterval",
 88    "PredictionSet",
 89    "SimpleMultitaskClassifier",
 90    "Optimizer",
 91    "QuantileRegressor",
 92    "QuantileClassifier",
 93    "RandomBagRegressor",
 94    "RandomBagClassifier",
 95    "RandomFourierEstimator",
 96    "RandomFourierFeaturesRidge",
 97    "RandomFourierFeaturesRidgeGCV",
 98    "RegressorUpdater",
 99    "ClassifierUpdater",
100    "RidgeRegressor",
101    "Ridge2Regressor",
102    "Ridge2MultiOutputRegressor",
103    "Ridge2Classifier",
104    "Ridge2MultitaskClassifier",
105    "Ridge2Forecaster",
106    "SubSampler",
107]
class AdaBoostClassifier(nnetsauce.boosting.bst.Boosting, sklearn.base.ClassifierMixin):
 21class AdaBoostClassifier(Boosting, ClassifierMixin):
 22    """AdaBoost Classification (SAMME) model class derived from class Boosting
 23
 24    Parameters:
 25
 26        obj: object
 27            any object containing a method fit (obj.fit()) and a method predict
 28            (obj.predict())
 29
 30        n_estimators: int
 31            number of boosting iterations
 32
 33        learning_rate: float
 34            learning rate of the boosting procedure
 35
 36        n_hidden_features: int
 37            number of nodes in the hidden layer
 38
 39        reg_lambda: float
 40            regularization parameter for weights
 41
 42        reg_alpha: float
 43            controls compromize between l1 and l2 norm of weights
 44
 45        activation_name: str
 46            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 47
 48        a: float
 49            hyperparameter for 'prelu' or 'elu' activation function
 50
 51        nodes_sim: str
 52            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 53            'uniform'
 54
 55        bias: boolean
 56            indicates if the hidden layer contains a bias term (True) or not
 57            (False)
 58
 59        dropout: float
 60            regularization parameter; (random) percentage of nodes dropped out
 61            of the training
 62
 63        direct_link: boolean
 64            indicates if the original predictors are included (True) in model's
 65            fitting or not (False)
 66
 67        n_clusters: int
 68            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 69                no clustering)
 70
 71        cluster_encode: bool
 72            defines how the variable containing clusters is treated (default is one-hot)
 73            if `False`, then labels are used, without one-hot encoding
 74
 75        type_clust: str
 76            type of clustering method: currently k-means ('kmeans') or Gaussian
 77            Mixture Model ('gmm')
 78
 79        type_scaling: a tuple of 3 strings
 80            scaling methods for inputs, hidden layer, and clustering respectively
 81            (and when relevant).
 82            Currently available: standardization ('std') or MinMax scaling ('minmax')
 83
 84        col_sample: float
 85            percentage of covariates randomly chosen for training
 86
 87        row_sample: float
 88            percentage of rows chosen for training, by stratified bootstrapping
 89
 90        seed: int
 91            reproducibility seed for nodes_sim=='uniform'
 92
 93        verbose: int
 94            0 for no output, 1 for a progress bar (default is 1)
 95
 96        method: str
 97            type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
 98
 99        backend: str
100            "cpu" or "gpu" or "tpu"
101
102    Attributes:
103
104        alpha_: list
105            AdaBoost coefficients alpha_m
106
107        base_learners_: dict
108            a dictionary containing the base learners
109
110    Examples:
111
112    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py)
113
114    ```python
115    import nnetsauce as ns
116    import numpy as np
117    from sklearn.datasets import load_breast_cancer
118    from sklearn.linear_model import LogisticRegression
119    from sklearn.model_selection import train_test_split
120    from sklearn import metrics
121    from time import time
122
123    breast_cancer = load_breast_cancer()
124    Z = breast_cancer.data
125    t = breast_cancer.target
126    np.random.seed(123)
127    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
128
129    # SAMME.R
130    clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
131                            random_state=123)
132    fit_obj = ns.AdaBoostClassifier(clf,
133                                    n_hidden_features=int(11.22338867),
134                                    direct_link=True,
135                                    n_estimators=250, learning_rate=0.01126343,
136                                    col_sample=0.72684326, row_sample=0.86429443,
137                                    dropout=0.63078613, n_clusters=2,
138                                    type_clust="gmm",
139                                    verbose=1, seed = 123,
140                                    method="SAMME.R")
141
142    start = time()
143    fit_obj.fit(X_train, y_train)
144    print(f"Elapsed {time() - start}")
145
146    start = time()
147    print(fit_obj.score(X_test, y_test))
148    print(f"Elapsed {time() - start}")
149
150    preds = fit_obj.predict(X_test)
151
152    print(metrics.classification_report(preds, y_test))
153
154    ```
155
156    """
157
158    # construct the object -----
159    _estimator_type = "classifier"
160
161    def __init__(
162        self,
163        obj,
164        n_estimators=10,
165        learning_rate=0.1,
166        n_hidden_features=1,
167        reg_lambda=0,
168        reg_alpha=0.5,
169        activation_name="relu",
170        a=0.01,
171        nodes_sim="sobol",
172        bias=True,
173        dropout=0,
174        direct_link=False,
175        n_clusters=2,
176        cluster_encode=True,
177        type_clust="kmeans",
178        type_scaling=("std", "std", "std"),
179        col_sample=1,
180        row_sample=1,
181        seed=123,
182        verbose=1,
183        method="SAMME",
184        backend="cpu",
185    ):
186        self.type_fit = "classification"
187        self.verbose = verbose
188        self.method = method
189        self.reg_lambda = reg_lambda
190        self.reg_alpha = reg_alpha
191
192        super().__init__(
193            obj=obj,
194            n_estimators=n_estimators,
195            learning_rate=learning_rate,
196            n_hidden_features=n_hidden_features,
197            activation_name=activation_name,
198            a=a,
199            nodes_sim=nodes_sim,
200            bias=bias,
201            dropout=dropout,
202            direct_link=direct_link,
203            n_clusters=n_clusters,
204            cluster_encode=cluster_encode,
205            type_clust=type_clust,
206            type_scaling=type_scaling,
207            col_sample=col_sample,
208            row_sample=row_sample,
209            seed=seed,
210            backend=backend,
211        )
212
213        self.alpha_ = []
214        self.base_learners_ = dict.fromkeys(range(n_estimators))
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self
376
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)
394
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]
470
471    @property
472    def _estimator_type(self):
473        return "classifier"

AdaBoost Classification (SAMME) model class derived from class Boosting

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

learning_rate: float
    learning rate of the boosting procedure

n_hidden_features: int
    number of nodes in the hidden layer

reg_lambda: float
    regularization parameter for weights

reg_alpha: float
    controls compromize between l1 and l2 norm of weights

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

verbose: int
    0 for no output, 1 for a progress bar (default is 1)

method: str
    type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

alpha_: list
    AdaBoost coefficients alpha_m

base_learners_: dict
    a dictionary containing the base learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
                        random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
                                n_hidden_features=int(11.22338867),
                                direct_link=True,
                                n_estimators=250, learning_rate=0.01126343,
                                col_sample=0.72684326, row_sample=0.86429443,
                                dropout=0.63078613, n_clusters=2,
                                type_clust="gmm",
                                verbose=1, seed = 123,
                                method="SAMME.R")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

preds = fit_obj.predict(X_test)

print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit Boosting model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            **kwargs: additional parameters to be passed to
229                    self.cook_training_set or self.obj.fit
230
231        Returns:
232
233             self: object
234        """
235
236        assert mx.is_factor(y), "y must contain only integers"
237
238        assert self.method in (
239            "SAMME",
240            "SAMME.R",
241        ), "`method` must be either 'SAMME' or 'SAMME.R'"
242
243        assert (self.reg_lambda <= 1) & (
244            self.reg_lambda >= 0
245        ), "must have self.reg_lambda <= 1 &  self.reg_lambda >= 0"
246
247        assert (self.reg_alpha <= 1) & (
248            self.reg_alpha >= 0
249        ), "must have self.reg_alpha <= 1 &  self.reg_alpha >= 0"
250
251        # training
252        n, p = X.shape
253        self.n_classes = len(np.unique(y))
254        self.classes_ = np.unique(y)  # for compatibility with sklearn
255        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
256
257        if sample_weight is None:
258            w_m = np.repeat(1.0 / n, n)
259        else:
260            w_m = np.asarray(sample_weight)
261
262        base_learner = CustomClassifier(
263            self.obj,
264            n_hidden_features=self.n_hidden_features,
265            activation_name=self.activation_name,
266            a=self.a,
267            nodes_sim=self.nodes_sim,
268            bias=self.bias,
269            dropout=self.dropout,
270            direct_link=self.direct_link,
271            n_clusters=self.n_clusters,
272            type_clust=self.type_clust,
273            type_scaling=self.type_scaling,
274            col_sample=self.col_sample,
275            row_sample=self.row_sample,
276            seed=self.seed,
277        )
278
279        if self.verbose == 1:
280            pbar = Progbar(self.n_estimators)
281
282        if self.method == "SAMME":
283            err_m = 1e6
284            err_bound = 1 - 1 / self.n_classes
285            self.alpha_.append(1.0)
286            x_range_n = range(n)
287
288            for m in range(self.n_estimators):
289                preds = base_learner.fit(
290                    X, y, sample_weight=w_m.ravel(), **kwargs
291                ).predict(X)
292
293                self.base_learners_.update({m: deepcopy(base_learner)})
294
295                cond = [y[i] != preds[i] for i in x_range_n]
296
297                err_m = max(
298                    sum([elt[0] * elt[1] for elt in zip(cond, w_m)]),
299                    2.220446049250313e-16,
300                )  # sum(w_m) == 1
301
302                if self.reg_lambda > 0:
303                    err_m += self.reg_lambda * (
304                        (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m])
305                        + self.reg_alpha * sum([abs(x) for x in w_m])
306                    )
307
308                err_m = min(err_m, err_bound)
309
310                alpha_m = self.learning_rate * log(
311                    (self.n_classes - 1) * (1 - err_m) / err_m
312                )
313
314                self.alpha_.append(alpha_m)
315
316                w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n]
317
318                sum_w_m = sum(w_m_temp)
319
320                w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n])
321
322                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
323
324                if self.verbose == 1:
325                    pbar.update(m)
326
327            if self.verbose == 1:
328                pbar.update(self.n_estimators)
329
330            self.n_estimators = len(self.base_learners_)
331            self.classes_ = np.unique(y)
332
333            return self
334
335        if self.method == "SAMME.R":
336            Y = mo.one_hot_encode2(y, self.n_classes)
337
338            if sample_weight is None:
339                w_m = np.repeat(1.0 / n, n)  # (N, 1)
340
341            else:
342                w_m = np.asarray(sample_weight)
343
344            for m in range(self.n_estimators):
345                probs = base_learner.fit(
346                    X, y, sample_weight=w_m.ravel(), **kwargs
347                ).predict_proba(X)
348
349                np.clip(
350                    a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs
351                )
352
353                self.base_learners_.update({m: deepcopy(base_learner)})
354
355                w_m *= np.exp(
356                    -1.0
357                    * self.learning_rate
358                    * (1.0 - 1.0 / self.n_classes)
359                    * xlogy(Y, probs).sum(axis=1)
360                )
361
362                w_m /= np.sum(w_m)
363
364                base_learner.set_params(seed=self.seed + (m + 1) * 1000)
365
366                if self.verbose == 1:
367                    pbar.update(m)
368
369            if self.verbose == 1:
370                pbar.update(self.n_estimators)
371
372            self.n_estimators = len(self.base_learners_)
373            self.classes_ = np.unique(y)
374
375            return self

Fit Boosting model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

 self: object
def predict(self, X, **kwargs):
377    def predict(self, X, **kwargs):
378        """Predict test data X.
379
380        Parameters:
381
382            X: {array-like}, shape = [n_samples, n_features]
383                Training vectors, where n_samples is the number
384                of samples and n_features is the number of features.
385
386            **kwargs: additional parameters to be passed to
387                  self.cook_test_set
388
389        Returns:
390
391            model predictions: {array-like}
392        """
393        return self.predict_proba(X, **kwargs).argmax(axis=1)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
395    def predict_proba(self, X, **kwargs):
396        """Predict probabilities for test data X.
397
398        Parameters:
399
400            X: {array-like}, shape = [n_samples, n_features]
401                Training vectors, where n_samples is the number
402                of samples and n_features is the number of features.
403
404            **kwargs: additional parameters to be passed to
405                  self.cook_test_set
406
407        Returns:
408
409            probability estimates for test data: {array-like}
410
411        """
412
413        n_iter = len(self.base_learners_)
414
415        if self.method == "SAMME":
416            ensemble_learner = np.zeros((X.shape[0], self.n_classes))
417
418            # if self.verbose == 1:
419            #    pbar = Progbar(n_iter)
420
421            for idx, base_learner in self.base_learners_.items():
422                preds = base_learner.predict(X, **kwargs)
423
424                ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2(
425                    preds, self.n_classes
426                )
427
428                # if self.verbose == 1:
429                #    pbar.update(idx)
430
431            # if self.verbose == 1:
432            #    pbar.update(n_iter)
433
434            expit_ensemble_learner = expit(ensemble_learner)
435
436            sum_ensemble = expit_ensemble_learner.sum(axis=1)
437
438            return expit_ensemble_learner / sum_ensemble[:, None]
439
440        # if self.method == "SAMME.R":
441        ensemble_learner = 0
442
443        # if self.verbose == 1:
444        #    pbar = Progbar(n_iter)
445
446        for idx, base_learner in self.base_learners_.items():
447            probs = base_learner.predict_proba(X, **kwargs)
448
449            np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs)
450
451            log_preds_proba = np.log(probs)
452
453            ensemble_learner += (
454                log_preds_proba - log_preds_proba.mean(axis=1)[:, None]
455            )
456
457            # if self.verbose == 1:
458            #    pbar.update(idx)
459
460        ensemble_learner *= self.n_classes - 1
461
462        # if self.verbose == 1:
463        #    pbar.update(n_iter)
464
465        expit_ensemble_learner = expit(ensemble_learner)
466
467        sum_ensemble = expit_ensemble_learner.sum(axis=1)
468
469        return expit_ensemble_learner / sum_ensemble[:, None]

Predict probabilities for test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
      self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Base(sklearn.base.BaseEstimator):
  48class Base(BaseEstimator):
  49    """Base model from which all the other classes inherit.
  50
  51    This class contains the most important data preprocessing/feature engineering methods.
  52
  53    Parameters:
  54
  55        n_hidden_features: int
  56            number of nodes in the hidden layer
  57
  58        activation_name: str
  59            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
  60
  61        a: float
  62            hyperparameter for 'prelu' or 'elu' activation function
  63
  64        nodes_sim: str
  65            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
  66            'uniform'
  67
  68        bias: boolean
  69            indicates if the hidden layer contains a bias term (True) or
  70            not (False)
  71
  72        dropout: float
  73            regularization parameter; (random) percentage of nodes dropped out
  74            of the training
  75
  76        direct_link: boolean
  77            indicates if the original features are included (True) in model's
  78            fitting or not (False)
  79
  80        n_clusters: int
  81            number of clusters for type_clust='kmeans' or type_clust='gmm'
  82            clustering (could be 0: no clustering)
  83
  84        cluster_encode: bool
  85            defines how the variable containing clusters is treated (default is one-hot);
  86            if `False`, then labels are used, without one-hot encoding
  87
  88        type_clust: str
  89            type of clustering method: currently k-means ('kmeans') or Gaussian
  90            Mixture Model ('gmm')
  91
  92        type_scaling: a tuple of 3 strings
  93            scaling methods for inputs, hidden layer, and clustering respectively
  94            (and when relevant).
  95            Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')
  96
  97        col_sample: float
  98            percentage of features randomly chosen for training
  99
 100        row_sample: float
 101            percentage of rows chosen for training, by stratified bootstrapping
 102
 103        seed: int
 104            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 105
 106        backend: str
 107            "cpu" or "gpu" or "tpu"
 108
 109    """
 110
 111    # construct the object -----
 112
 113    def __init__(
 114        self,
 115        n_hidden_features=5,
 116        activation_name="relu",
 117        a=0.01,
 118        nodes_sim="sobol",
 119        bias=True,
 120        dropout=0,
 121        direct_link=True,
 122        n_clusters=2,
 123        cluster_encode=True,
 124        type_clust="kmeans",
 125        type_scaling=("std", "std", "std"),
 126        col_sample=1,
 127        row_sample=1,
 128        seed=123,
 129        backend="cpu",
 130    ):
 131        if not JAX_AVAILABLE and backend != "cpu":
 132            raise RuntimeError(
 133                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 134            )
 135
 136        # input checks -----
 137
 138        sys_platform = platform.system()
 139
 140        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 141            warnings.warn(
 142                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 143            )
 144            backend = "cpu"
 145
 146        assert activation_name in (
 147            "relu",
 148            "tanh",
 149            "sigmoid",
 150            "prelu",
 151            "elu",
 152        ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')"
 153
 154        assert nodes_sim in (
 155            "sobol",
 156            "hammersley",
 157            "uniform",
 158            "halton",
 159        ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')"
 160
 161        assert type_clust in (
 162            "kmeans",
 163            "gmm",
 164        ), "'type_clust' must be in ('kmeans', 'gmm')"
 165
 166        assert (len(type_scaling) == 3) & all(
 167            type_scaling[i] in ("minmax", "std", "robust", "maxabs")
 168            for i in range(len(type_scaling))
 169        ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')"
 170
 171        assert (col_sample >= 0) & (
 172            col_sample <= 1
 173        ), "'col_sample' must be comprised between 0 and 1 (both included)"
 174
 175        assert backend in (
 176            "cpu",
 177            "gpu",
 178            "tpu",
 179        ), "must have 'backend' in ('cpu', 'gpu', 'tpu')"
 180
 181        self.n_hidden_features = n_hidden_features
 182        self.activation_name = activation_name
 183        self.a = a
 184        self.nodes_sim = nodes_sim
 185        self.bias = bias
 186        self.seed = seed
 187        self.backend = backend
 188        self.dropout = dropout
 189        self.direct_link = direct_link
 190        self.cluster_encode = cluster_encode
 191        self.type_clust = type_clust
 192        self.type_scaling = type_scaling
 193        self.col_sample = col_sample
 194        self.row_sample = row_sample
 195        self.n_clusters = n_clusters
 196        if isinstance(self, RegressorMixin):
 197            self.type_fit = "regression"
 198        elif isinstance(self, ClassifierMixin):
 199            self.type_fit = "classification"
 200        self.subsampler_ = None
 201        self.index_col_ = None
 202        self.index_row_ = True
 203        self.clustering_obj_ = None
 204        self.clustering_scaler_ = None
 205        self.nn_scaler_ = None
 206        self.scaler_ = None
 207        self.encoder_ = None
 208        self.W_ = None
 209        self.X_ = None
 210        self.y_ = None
 211        self.y_mean_ = None
 212        self.beta_ = None
 213
 214        # activation function -----
 215
 216        activation_options = {
 217            "relu": ac.relu if (self.backend == "cpu") else jnn.relu,
 218            "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh,
 219            "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid),
 220            "prelu": partial(ac.prelu, a=a),
 221            "elu": (
 222                partial(ac.elu, a=a)
 223                if (self.backend == "cpu")
 224                else partial(jnn.elu, a=a)
 225            ),
 226        }
 227
 228        self.activation_func = activation_options[activation_name]
 229
 230    # "preprocessing" methods to be inherited -----
 231
 232    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
 233        """Create new covariates with kmeans or GMM clustering
 234
 235        Parameters:
 236
 237            X: {array-like}, shape = [n_samples, n_features]
 238                Training vectors, where n_samples is the number
 239                of samples and n_features is the number of features.
 240
 241            predict: boolean
 242                is False on training set and True on test set
 243
 244            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
 245                if scaler has already been fitted on training data (online training), it can be passed here
 246
 247            **kwargs:
 248                additional parameters to be passed to the
 249                clustering method
 250
 251        Returns:
 252
 253            Clusters' matrix, one-hot encoded: {array-like}
 254
 255        """
 256
 257        np.random.seed(self.seed)
 258
 259        if X is None:
 260            X = self.X_
 261
 262        if isinstance(X, pd.DataFrame):
 263            X = copy.deepcopy(X.values.astype(float))
 264
 265        if len(X.shape) == 1:
 266            X = X.reshape(1, -1)
 267
 268        if predict is False:  # encode training set
 269            # scale input data before clustering
 270            self.clustering_scaler_, scaled_X = mo.scale_covariates(
 271                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
 272            )
 273
 274            self.clustering_obj_, X_clustered = mo.cluster_covariates(
 275                scaled_X,
 276                self.n_clusters,
 277                self.seed,
 278                type_clust=self.type_clust,
 279                **kwargs
 280            )
 281
 282            if self.cluster_encode:
 283                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 284                    np.float16
 285                )
 286
 287            return X_clustered.astype(np.float16)
 288
 289        # if predict == True, encode test set
 290        X_clustered = self.clustering_obj_.predict(
 291            self.clustering_scaler_.transform(X)
 292        )
 293
 294        if self.cluster_encode == True:
 295            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
 296                np.float16
 297            )
 298
 299        return X_clustered.astype(np.float16)
 300
 301    def create_layer(self, scaled_X, W=None):
 302        """Create hidden layer.
 303
 304        Parameters:
 305
 306            scaled_X: {array-like}, shape = [n_samples, n_features]
 307                Training vectors, where n_samples is the number
 308                of samples and n_features is the number of features
 309
 310            W: {array-like}, shape = [n_features, hidden_features]
 311                if provided, constructs the hidden layer with W; otherwise computed internally
 312
 313        Returns:
 314
 315            Hidden layer matrix: {array-like}
 316
 317        """
 318
 319        n_features = scaled_X.shape[1]
 320
 321        # hash_sim = {
 322        #         "sobol": generate_sobol,
 323        #         "hammersley": generate_hammersley,
 324        #         "uniform": generate_uniform,
 325        #         "halton": generate_halton
 326        #     }
 327
 328        if self.bias is False:  # no bias term in the hidden layer
 329            if W is None:
 330                if self.nodes_sim == "sobol":
 331                    self.W_ = generate_sobol(
 332                        n_dims=n_features,
 333                        n_points=self.n_hidden_features,
 334                        seed=self.seed,
 335                    )
 336                elif self.nodes_sim == "hammersley":
 337                    self.W_ = generate_hammersley(
 338                        n_dims=n_features,
 339                        n_points=self.n_hidden_features,
 340                        seed=self.seed,
 341                    )
 342                elif self.nodes_sim == "uniform":
 343                    self.W_ = generate_uniform(
 344                        n_dims=n_features,
 345                        n_points=self.n_hidden_features,
 346                        seed=self.seed,
 347                    )
 348                else:
 349                    self.W_ = generate_halton(
 350                        n_dims=n_features,
 351                        n_points=self.n_hidden_features,
 352                        seed=self.seed,
 353                    )
 354
 355                assert (
 356                    scaled_X.shape[1] == self.W_.shape[0]
 357                ), "check dimensions of covariates X and matrix W"
 358
 359                return mo.dropout(
 360                    x=self.activation_func(
 361                        mo.safe_sparse_dot(
 362                            a=scaled_X, b=self.W_, backend=self.backend
 363                        )
 364                    ),
 365                    drop_prob=self.dropout,
 366                    seed=self.seed,
 367                )
 368
 369            # W is not none
 370            assert (
 371                scaled_X.shape[1] == W.shape[0]
 372            ), "check dimensions of covariates X and matrix W"
 373
 374            # self.W_ = W
 375            return mo.dropout(
 376                x=self.activation_func(
 377                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
 378                ),
 379                drop_prob=self.dropout,
 380                seed=self.seed,
 381            )
 382
 383        # with bias term in the hidden layer
 384        if W is None:
 385            n_features_1 = n_features + 1
 386
 387            if self.nodes_sim == "sobol":
 388                self.W_ = generate_sobol(
 389                    n_dims=n_features_1,
 390                    n_points=self.n_hidden_features,
 391                    seed=self.seed,
 392                )
 393            elif self.nodes_sim == "hammersley":
 394                self.W_ = generate_hammersley(
 395                    n_dims=n_features_1,
 396                    n_points=self.n_hidden_features,
 397                    seed=self.seed,
 398                )
 399            elif self.nodes_sim == "uniform":
 400                self.W_ = generate_uniform(
 401                    n_dims=n_features_1,
 402                    n_points=self.n_hidden_features,
 403                    seed=self.seed,
 404                )
 405            else:
 406                self.W_ = generate_halton(
 407                    n_dims=n_features_1,
 408                    n_points=self.n_hidden_features,
 409                    seed=self.seed,
 410                )
 411
 412            # self.W_ = hash_sim[self.nodes_sim](
 413            #         n_dims=n_features_1,
 414            #         n_points=self.n_hidden_features,
 415            #         seed=self.seed,
 416            #     )
 417
 418            return mo.dropout(
 419                x=self.activation_func(
 420                    mo.safe_sparse_dot(
 421                        a=mo.cbind(
 422                            np.ones(scaled_X.shape[0]),
 423                            scaled_X,
 424                            backend=self.backend,
 425                        ),
 426                        b=self.W_,
 427                        backend=self.backend,
 428                    )
 429                ),
 430                drop_prob=self.dropout,
 431                seed=self.seed,
 432            )
 433
 434        # W is not None
 435        # self.W_ = W
 436        return mo.dropout(
 437            x=self.activation_func(
 438                mo.safe_sparse_dot(
 439                    a=mo.cbind(
 440                        np.ones(scaled_X.shape[0]),
 441                        scaled_X,
 442                        backend=self.backend,
 443                    ),
 444                    b=W,
 445                    backend=self.backend,
 446                )
 447            ),
 448            drop_prob=self.dropout,
 449            seed=self.seed,
 450        )
 451
 452    def _jax_create_layer(self, scaled_X, W=None):
 453        """JAX-compatible version of create_layer that exactly matches the original functionality."""
 454        key = jax.random.PRNGKey(self.seed)
 455        n_features = scaled_X.shape[1]
 456
 457        # Generate weights if not provided
 458        if W is None:
 459            if self.bias:
 460                n_features_1 = n_features + 1
 461                shape = (n_features_1, self.n_hidden_features)
 462            else:
 463                shape = (n_features, self.n_hidden_features)
 464
 465            # JAX-compatible weight generation matching original behavior
 466            if self.nodes_sim == "sobol":
 467                W_np = generate_sobol(
 468                    n_dims=n_features_1,
 469                    n_points=self.n_hidden_features,
 470                    seed=self.seed,
 471                )
 472                W = jnp.asarray(W_np)
 473            elif self.nodes_sim == "hammersley":
 474                W_np = generate_hammersley(
 475                    n_dims=n_features_1,
 476                    n_points=self.n_hidden_features,
 477                    seed=self.seed,
 478                )
 479                W = jnp.asarray(W_np)
 480            elif self.nodes_sim == "uniform":
 481                key, subkey = jax.random.split(key)
 482                W = jax.random.uniform(
 483                    subkey, shape=shape, minval=-1.0, maxval=1.0
 484                )
 485            else:  # halton
 486                W_np = generate_halton(
 487                    n_dims=n_features_1,
 488                    n_points=self.n_hidden_features,
 489                    seed=self.seed,
 490                )
 491                W = jnp.asarray(W_np)
 492
 493            self.W_ = np.array(W)  # Store as numpy for original methods
 494
 495        # Prepare input with bias if needed
 496        if self.bias:
 497            X_with_bias = jnp.hstack(
 498                [jnp.ones((scaled_X.shape[0], 1)), scaled_X]
 499            )
 500            print("X_with_bias shape:", X_with_bias.shape)
 501            print("W shape:", W.shape)
 502            linear_output = jnp.dot(X_with_bias, W)
 503        else:
 504            linear_output = jnp.dot(scaled_X, W)
 505
 506        # Apply activation function
 507        if self.activation_name == "relu":
 508            activated = jax.nn.relu(linear_output)
 509        elif self.activation_name == "tanh":
 510            activated = jnp.tanh(linear_output)
 511        elif self.activation_name == "sigmoid":
 512            activated = jax.nn.sigmoid(linear_output)
 513        else:  # leaky relu
 514            activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 515
 516        # Apply dropout
 517        if self.dropout > 0:
 518            key, subkey = jax.random.split(key)
 519            mask = jax.random.bernoulli(
 520                subkey, p=1 - self.dropout, shape=activated.shape
 521            )
 522            activated = jnp.where(mask, activated / (1 - self.dropout), 0)
 523
 524        return activated
 525
 526    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
 527        """Create new hidden features for training set, with hidden layer, center the response.
 528
 529        Parameters:
 530
 531            y: array-like, shape = [n_samples]
 532                Target values
 533
 534            X: {array-like}, shape = [n_samples, n_features]
 535                Training vectors, where n_samples is the number
 536                of samples and n_features is the number of features
 537
 538            W: {array-like}, shape = [n_features, hidden_features]
 539                if provided, constructs the hidden layer via W
 540
 541        Returns:
 542
 543            (centered response, direct link + hidden layer matrix): {tuple}
 544
 545        """
 546
 547        # either X and y are stored or not
 548        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
 549        if self.n_hidden_features > 0:  # has a hidden layer
 550            assert (
 551                len(self.type_scaling) >= 2
 552            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
 553
 554        if X is None:
 555            if self.col_sample == 1:
 556                input_X = self.X_
 557            else:
 558                n_features = self.X_.shape[1]
 559                new_n_features = int(np.ceil(n_features * self.col_sample))
 560                assert (
 561                    new_n_features >= 1
 562                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 563                np.random.seed(self.seed)
 564                index_col = np.random.choice(
 565                    range(n_features), size=new_n_features, replace=False
 566                )
 567                self.index_col_ = index_col
 568                input_X = self.X_[:, self.index_col_]
 569
 570        else:  # X is not None # keep X vs self.X_
 571            if isinstance(X, pd.DataFrame):
 572                X = copy.deepcopy(X.values.astype(float))
 573
 574            if self.col_sample == 1:
 575                input_X = X
 576            else:
 577                n_features = X.shape[1]
 578                new_n_features = int(np.ceil(n_features * self.col_sample))
 579                assert (
 580                    new_n_features >= 1
 581                ), "check class attribute 'col_sample' and the number of covariates provided for X"
 582                np.random.seed(self.seed)
 583                index_col = np.random.choice(
 584                    range(n_features), size=new_n_features, replace=False
 585                )
 586                self.index_col_ = index_col
 587                input_X = X[:, self.index_col_]
 588
 589        if self.n_clusters <= 0:
 590            # data without any clustering: self.n_clusters is None -----
 591
 592            if self.n_hidden_features > 0:  # with hidden layer
 593                self.nn_scaler_, scaled_X = mo.scale_covariates(
 594                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
 595                )
 596                Phi_X = (
 597                    self.create_layer(scaled_X)
 598                    if W is None
 599                    else self.create_layer(scaled_X, W=W)
 600                )
 601                Z = (
 602                    mo.cbind(input_X, Phi_X, backend=self.backend)
 603                    if self.direct_link is True
 604                    else Phi_X
 605                )
 606                self.scaler_, scaled_Z = mo.scale_covariates(
 607                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 608                )
 609            else:  # no hidden layer
 610                Z = input_X
 611                self.scaler_, scaled_Z = mo.scale_covariates(
 612                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 613                )
 614
 615        else:
 616            # data with clustering: self.n_clusters is not None ----- # keep
 617
 618            augmented_X = mo.cbind(
 619                input_X,
 620                self.encode_clusters(input_X, **kwargs),
 621                backend=self.backend,
 622            )
 623
 624            if self.n_hidden_features > 0:  # with hidden layer
 625                self.nn_scaler_, scaled_X = mo.scale_covariates(
 626                    augmented_X,
 627                    choice=self.type_scaling[1],
 628                    scaler=self.nn_scaler_,
 629                )
 630                Phi_X = (
 631                    self.create_layer(scaled_X)
 632                    if W is None
 633                    else self.create_layer(scaled_X, W=W)
 634                )
 635                Z = (
 636                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 637                    if self.direct_link is True
 638                    else Phi_X
 639                )
 640                self.scaler_, scaled_Z = mo.scale_covariates(
 641                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 642                )
 643            else:  # no hidden layer
 644                Z = augmented_X
 645                self.scaler_, scaled_Z = mo.scale_covariates(
 646                    Z, choice=self.type_scaling[0], scaler=self.scaler_
 647                )
 648
 649        # Returning model inputs -----
 650        if mx.is_factor(y) is False:  # regression
 651            # center y
 652            if y is None:
 653                self.y_mean_, centered_y = mo.center_response(self.y_)
 654            else:
 655                self.y_mean_, centered_y = mo.center_response(y)
 656
 657            # y is subsampled
 658            if self.row_sample < 1:
 659                n, p = Z.shape
 660
 661                self.subsampler_ = (
 662                    SubSampler(
 663                        y=self.y_, row_sample=self.row_sample, seed=self.seed
 664                    )
 665                    if y is None
 666                    else SubSampler(
 667                        y=y, row_sample=self.row_sample, seed=self.seed
 668                    )
 669                )
 670
 671                self.index_row_ = self.subsampler_.subsample()
 672
 673                n_row_sample = len(self.index_row_)
 674                # regression
 675                return (
 676                    centered_y[self.index_row_].reshape(n_row_sample),
 677                    self.scaler_.transform(
 678                        Z[self.index_row_, :].reshape(n_row_sample, p)
 679                    ),
 680                )
 681            # y is not subsampled
 682            # regression
 683            return (centered_y, self.scaler_.transform(Z))
 684
 685        # classification
 686        # y is subsampled
 687        if self.row_sample < 1:
 688            n, p = Z.shape
 689
 690            self.subsampler_ = (
 691                SubSampler(
 692                    y=self.y_, row_sample=self.row_sample, seed=self.seed
 693                )
 694                if y is None
 695                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
 696            )
 697
 698            self.index_row_ = self.subsampler_.subsample()
 699
 700            n_row_sample = len(self.index_row_)
 701            # classification
 702            return (
 703                y[self.index_row_].reshape(n_row_sample),
 704                self.scaler_.transform(
 705                    Z[self.index_row_, :].reshape(n_row_sample, p)
 706                ),
 707            )
 708        # y is not subsampled
 709        # classification
 710        return (y, self.scaler_.transform(Z))
 711
 712    def cook_test_set(self, X, **kwargs):
 713        """Transform data from test set, with hidden layer.
 714
 715        Parameters:
 716
 717            X: {array-like}, shape = [n_samples, n_features]
 718                Training vectors, where n_samples is the number
 719                of samples and n_features is the number of features
 720
 721            **kwargs: additional parameters to be passed to self.encode_cluster
 722
 723        Returns:
 724
 725            Transformed test set : {array-like}
 726        """
 727
 728        if isinstance(X, pd.DataFrame):
 729            X = copy.deepcopy(X.values.astype(float))
 730
 731        if len(X.shape) == 1:
 732            X = X.reshape(1, -1)
 733
 734        if (
 735            self.n_clusters == 0
 736        ):  # data without clustering: self.n_clusters is None -----
 737            if self.n_hidden_features > 0:
 738                # if hidden layer
 739                scaled_X = (
 740                    self.nn_scaler_.transform(X)
 741                    if (self.col_sample == 1)
 742                    else self.nn_scaler_.transform(X[:, self.index_col_])
 743                )
 744                Phi_X = self.create_layer(scaled_X, self.W_)
 745                if self.direct_link:
 746                    return self.scaler_.transform(
 747                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
 748                    )
 749                # when self.direct_link == False
 750                return self.scaler_.transform(Phi_X)
 751            # if no hidden layer # self.n_hidden_features == 0
 752            return self.scaler_.transform(X)
 753
 754        # data with clustering: self.n_clusters > 0 -----
 755        if self.col_sample == 1:
 756            predicted_clusters = self.encode_clusters(
 757                X=X, predict=True, **kwargs
 758            )
 759            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
 760        else:
 761            predicted_clusters = self.encode_clusters(
 762                X=X[:, self.index_col_], predict=True, **kwargs
 763            )
 764            augmented_X = mo.cbind(
 765                X[:, self.index_col_], predicted_clusters, backend=self.backend
 766            )
 767
 768        if self.n_hidden_features > 0:  # if hidden layer
 769            scaled_X = self.nn_scaler_.transform(augmented_X)
 770            Phi_X = self.create_layer(scaled_X, self.W_)
 771            if self.direct_link:
 772                return self.scaler_.transform(
 773                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
 774                )
 775            return self.scaler_.transform(Phi_X)
 776
 777        # if no hidden layer
 778        return self.scaler_.transform(augmented_X)
 779
 780    def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs):
 781        """JAX-compatible version of cook_training_set that maintains side effects."""
 782        # Initialize random key
 783        key = jax.random.PRNGKey(self.seed)
 784
 785        # Convert inputs to JAX arrays
 786        X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_)
 787        y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_)
 788
 789        # Handle column sampling
 790        if self.col_sample < 1:
 791            n_features = X.shape[1]
 792            new_n_features = int(jnp.ceil(n_features * self.col_sample))
 793            assert new_n_features >= 1, "Invalid col_sample"
 794
 795            key, subkey = jax.random.split(key)
 796            index_col = jax.random.choice(
 797                subkey, n_features, shape=(new_n_features,), replace=False
 798            )
 799            self.index_col_ = np.array(
 800                index_col
 801            )  # Store as numpy for original methods
 802            input_X = X[:, index_col]
 803            n_features = (
 804                new_n_features  # Update n_features after column sampling
 805            )
 806        else:
 807            input_X = X
 808            n_features = X.shape[1]
 809
 810        augmented_X = input_X
 811
 812        # JAX-compatible scaling
 813        def jax_scale(data, mean=None, std=None):
 814            if mean is None:
 815                mean = jnp.mean(data, axis=0)
 816            if std is None:
 817                std = jnp.std(data, axis=0)
 818            return (data - mean) / (std + 1e-10), mean, std
 819
 820        # Hidden layer processing
 821        if self.n_hidden_features > 0:
 822            # Initialize weights if not provided
 823            if W is None:
 824                shape = (n_features, self.n_hidden_features)
 825
 826                # JAX-compatible weight generation
 827                if self.nodes_sim == "uniform":
 828                    key, subkey = jax.random.split(key)
 829                    W = jax.random.uniform(
 830                        subkey, shape=shape, minval=-1.0, maxval=1.0
 831                    ) * (1 / jnp.sqrt(n_features))
 832                else:
 833                    # For other sequences, use numpy generation then convert to JAX
 834                    if self.nodes_sim == "sobol":
 835                        W_np = generate_sobol(
 836                            n_dims=shape[0],
 837                            n_points=shape[1],
 838                            seed=self.seed,
 839                        )
 840                    elif self.nodes_sim == "hammersley":
 841                        W_np = generate_hammersley(
 842                            n_dims=shape[0],
 843                            n_points=shape[1],
 844                            seed=self.seed,
 845                        )
 846                    elif self.nodes_sim == "halton":
 847                        W_np = generate_halton(
 848                            n_dims=shape[0],
 849                            n_points=shape[1],
 850                            seed=self.seed,
 851                        )
 852                    else:  # default to uniform
 853                        key, subkey = jax.random.split(key)
 854                        W = jax.random.uniform(
 855                            subkey, shape=shape, minval=-1.0, maxval=1.0
 856                        ) * (1 / jnp.sqrt(n_features))
 857
 858                    if self.nodes_sim in ["sobol", "hammersley", "halton"]:
 859                        W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features))
 860
 861                self.W_ = np.array(W)  # Store as numpy for original methods
 862
 863            # Scale features
 864            scaled_X, self.nn_mean_, self.nn_std_ = jax_scale(
 865                augmented_X,
 866                getattr(self, "nn_mean_", None),
 867                getattr(self, "nn_std_", None),
 868            )
 869
 870            # Create hidden layer with proper bias handling
 871            linear_output = jnp.dot(scaled_X, W)
 872
 873            # Apply activation
 874            if self.activation_name == "relu":
 875                Phi_X = jax.nn.relu(linear_output)
 876            elif self.activation_name == "tanh":
 877                Phi_X = jnp.tanh(linear_output)
 878            elif self.activation_name == "sigmoid":
 879                Phi_X = jax.nn.sigmoid(linear_output)
 880            else:  # leaky relu
 881                Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 882
 883            # Apply dropout
 884            if self.dropout > 0:
 885                key, subkey = jax.random.split(key)
 886                mask = jax.random.bernoulli(
 887                    subkey, p=1 - self.dropout, shape=Phi_X.shape
 888                )
 889                Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0)
 890
 891            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 892        else:
 893            Z = augmented_X
 894
 895        # Final scaling
 896        scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale(
 897            Z,
 898            getattr(self, "scale_mean_", None),
 899            getattr(self, "scale_std_", None),
 900        )
 901
 902        # Center response for regression
 903        if not hasattr(mx, "is_factor") or not mx.is_factor(
 904            y
 905        ):  # regression case
 906            self.y_mean_ = float(
 907                jnp.mean(y)
 908            )  # Convert to Python float for compatibility
 909            centered_y = y - self.y_mean_
 910        else:
 911            centered_y = y
 912
 913        # Handle row sampling
 914        if self.row_sample < 1:
 915            key, subkey = jax.random.split(key)
 916            n_samples = Z.shape[0]
 917            n_row_sample = int(jnp.ceil(n_samples * self.row_sample))
 918            index_row = jax.random.choice(
 919                subkey, n_samples, shape=(n_row_sample,), replace=False
 920            )
 921            self.index_row_ = np.array(
 922                index_row
 923            )  # Store as numpy for original methods
 924            return (centered_y[index_row], scaled_Z[index_row])
 925
 926        return (centered_y, scaled_Z)
 927
 928    def cook_test_set_jax(self, X, **kwargs):
 929        """JAX-compatible test set processing with matching dimension handling."""
 930        X = jnp.asarray(X)
 931
 932        if len(X.shape) == 1:
 933            X = X.reshape(1, -1)
 934
 935        # Handle column sampling
 936        input_X = (
 937            X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)]
 938        )
 939
 940        augmented_X = input_X
 941
 942        # JAX-compatible scaling
 943        scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10)
 944
 945        # Process hidden layer if needed
 946        if self.n_hidden_features > 0:
 947            Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_))
 948            Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X
 949        else:
 950            Z = augmented_X
 951
 952        # Final scaling
 953        scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10)
 954
 955        return scaled_Z
 956
 957    def _jax_create_layer(self, X, W):
 958        """JAX-compatible hidden layer creation."""
 959        # print("X", X.shape)
 960        # print("W", W.shape)
 961        # print("self.W_", self.W_.shape)
 962        linear_output = jnp.dot(X, W)
 963
 964        if self.activation_name == "relu":
 965            return jax.nn.relu(linear_output)
 966        elif self.activation_name == "tanh":
 967            return jnp.tanh(linear_output)
 968        elif self.activation_name == "sigmoid":
 969            return jax.nn.sigmoid(linear_output)
 970        else:  # leaky relu
 971            return jax.nn.leaky_relu(linear_output, negative_slope=self.a)
 972
 973    def cross_val_score(
 974        self,
 975        X,
 976        y,
 977        cv=5,
 978        scoring="accuracy",
 979        random_state=42,
 980        n_jobs=-1,
 981        epsilon=0.5,
 982        penalized=True,
 983        objective="abs",
 984        **kwargs
 985    ):
 986        """
 987        Penalized Cross-validation score for a model.
 988
 989        Parameters:
 990
 991            X: {array-like}, shape = [n_samples, n_features]
 992                Training vectors, where n_samples is the number
 993                of samples and n_features is the number of features
 994
 995            y: array-like, shape = [n_samples]
 996                Target values
 997
 998            X_test: {array-like}, shape = [n_samples, n_features]
 999                Test vectors, where n_samples is the number
1000                of samples and n_features is the number of features
1001
1002            y_test: array-like, shape = [n_samples]
1003                Target values
1004
1005            cv: int
1006                Number of folds
1007
1008            scoring: str
1009                Scoring metric
1010
1011            random_state: int
1012                Random state
1013
1014            n_jobs: int
1015                Number of jobs to run in parallel
1016
1017            epsilon: float
1018                Penalty parameter
1019
1020            penalized: bool
1021                Whether to obtain penalized cross-validation score or not
1022
1023            objective: str
1024                'abs': Minimize the absolute difference between cross-validation score and validation score
1025                'relative': Minimize the relative difference between cross-validation score and validation score
1026        Returns:
1027
1028            A namedtuple with the following fields:
1029                - cv_score: float
1030                    cross-validation score
1031                - val_score: float
1032                    validation score
1033                - penalized_score: float
1034                    penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score)
1035                    If higher scoring metric is better, minimize the function result.
1036                    If lower scoring metric is better, maximize the function result.
1037        """
1038        if scoring == "accuracy":
1039            scoring_func = accuracy_score
1040        elif scoring == "balanced_accuracy":
1041            scoring_func = balanced_accuracy_score
1042        elif scoring == "f1":
1043            scoring_func = f1_score
1044        elif scoring == "roc_auc":
1045            scoring_func = roc_auc_score
1046        elif scoring == "r2":
1047            scoring_func = r2_score
1048        elif scoring == "mse":
1049            scoring_func = mean_squared_error
1050        elif scoring == "mae":
1051            scoring_func = mean_absolute_error
1052        elif scoring == "mape":
1053            scoring_func = mean_absolute_percentage_error
1054        elif scoring == "rmse":
1055
1056            def scoring_func(y_true, y_pred):
1057                return np.sqrt(mean_squared_error(y_true, y_pred))
1058
1059        X_train, X_val, y_train, y_val = train_test_split(
1060            X, y, test_size=0.2, random_state=random_state
1061        )
1062
1063        res = cross_val_score(
1064            self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs
1065        )  # cross-validation error
1066
1067        if penalized == False:
1068            return res
1069
1070        DescribeResult = namedtuple(
1071            "DescribeResult", ["cv_score", "val_score", "penalized_score"]
1072        )
1073
1074        numerator = res.mean()
1075
1076        # Evaluate on the (cv+1)-th fold
1077        preds_val = self.fit(X_train, y_train).predict(X_val)
1078        try:
1079            denominator = scoring(y_val, preds_val)  # validation error
1080        except Exception as e:
1081            denominator = scoring_func(y_val, preds_val)
1082
1083        # if higher is better
1084        if objective == "abs":
1085            penalized_score = np.abs(numerator - denominator) + epsilon * (
1086                1 / denominator + 1 / numerator
1087            )
1088        elif objective == "relative":
1089            ratio = numerator / denominator
1090            penalized_score = np.abs(ratio - 1) + epsilon * (
1091                1 / denominator + 1 / numerator
1092            )
1093
1094        return DescribeResult(
1095            cv_score=numerator,
1096            val_score=denominator,
1097            penalized_score=penalized_score,
1098        )

Base model from which all the other classes inherit.

This class contains the most important data preprocessing/feature engineering methods.

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or  max absolute scaling ('maxabs')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"
def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):
232    def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs):  #
233        """Create new covariates with kmeans or GMM clustering
234
235        Parameters:
236
237            X: {array-like}, shape = [n_samples, n_features]
238                Training vectors, where n_samples is the number
239                of samples and n_features is the number of features.
240
241            predict: boolean
242                is False on training set and True on test set
243
244            scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
245                if scaler has already been fitted on training data (online training), it can be passed here
246
247            **kwargs:
248                additional parameters to be passed to the
249                clustering method
250
251        Returns:
252
253            Clusters' matrix, one-hot encoded: {array-like}
254
255        """
256
257        np.random.seed(self.seed)
258
259        if X is None:
260            X = self.X_
261
262        if isinstance(X, pd.DataFrame):
263            X = copy.deepcopy(X.values.astype(float))
264
265        if len(X.shape) == 1:
266            X = X.reshape(1, -1)
267
268        if predict is False:  # encode training set
269            # scale input data before clustering
270            self.clustering_scaler_, scaled_X = mo.scale_covariates(
271                X, choice=self.type_scaling[2], scaler=self.clustering_scaler_
272            )
273
274            self.clustering_obj_, X_clustered = mo.cluster_covariates(
275                scaled_X,
276                self.n_clusters,
277                self.seed,
278                type_clust=self.type_clust,
279                **kwargs
280            )
281
282            if self.cluster_encode:
283                return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
284                    np.float16
285                )
286
287            return X_clustered.astype(np.float16)
288
289        # if predict == True, encode test set
290        X_clustered = self.clustering_obj_.predict(
291            self.clustering_scaler_.transform(X)
292        )
293
294        if self.cluster_encode == True:
295            return mo.one_hot_encode(X_clustered, self.n_clusters).astype(
296                np.float16
297            )
298
299        return X_clustered.astype(np.float16)

Create new covariates with kmeans or GMM clustering

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

predict: boolean
    is False on training set and True on test set

scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
    if scaler has already been fitted on training data (online training), it can be passed here

**kwargs:
    additional parameters to be passed to the
    clustering method

Returns:

Clusters' matrix, one-hot encoded: {array-like}
def create_layer(self, scaled_X, W=None):
301    def create_layer(self, scaled_X, W=None):
302        """Create hidden layer.
303
304        Parameters:
305
306            scaled_X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features
309
310            W: {array-like}, shape = [n_features, hidden_features]
311                if provided, constructs the hidden layer with W; otherwise computed internally
312
313        Returns:
314
315            Hidden layer matrix: {array-like}
316
317        """
318
319        n_features = scaled_X.shape[1]
320
321        # hash_sim = {
322        #         "sobol": generate_sobol,
323        #         "hammersley": generate_hammersley,
324        #         "uniform": generate_uniform,
325        #         "halton": generate_halton
326        #     }
327
328        if self.bias is False:  # no bias term in the hidden layer
329            if W is None:
330                if self.nodes_sim == "sobol":
331                    self.W_ = generate_sobol(
332                        n_dims=n_features,
333                        n_points=self.n_hidden_features,
334                        seed=self.seed,
335                    )
336                elif self.nodes_sim == "hammersley":
337                    self.W_ = generate_hammersley(
338                        n_dims=n_features,
339                        n_points=self.n_hidden_features,
340                        seed=self.seed,
341                    )
342                elif self.nodes_sim == "uniform":
343                    self.W_ = generate_uniform(
344                        n_dims=n_features,
345                        n_points=self.n_hidden_features,
346                        seed=self.seed,
347                    )
348                else:
349                    self.W_ = generate_halton(
350                        n_dims=n_features,
351                        n_points=self.n_hidden_features,
352                        seed=self.seed,
353                    )
354
355                assert (
356                    scaled_X.shape[1] == self.W_.shape[0]
357                ), "check dimensions of covariates X and matrix W"
358
359                return mo.dropout(
360                    x=self.activation_func(
361                        mo.safe_sparse_dot(
362                            a=scaled_X, b=self.W_, backend=self.backend
363                        )
364                    ),
365                    drop_prob=self.dropout,
366                    seed=self.seed,
367                )
368
369            # W is not none
370            assert (
371                scaled_X.shape[1] == W.shape[0]
372            ), "check dimensions of covariates X and matrix W"
373
374            # self.W_ = W
375            return mo.dropout(
376                x=self.activation_func(
377                    mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend)
378                ),
379                drop_prob=self.dropout,
380                seed=self.seed,
381            )
382
383        # with bias term in the hidden layer
384        if W is None:
385            n_features_1 = n_features + 1
386
387            if self.nodes_sim == "sobol":
388                self.W_ = generate_sobol(
389                    n_dims=n_features_1,
390                    n_points=self.n_hidden_features,
391                    seed=self.seed,
392                )
393            elif self.nodes_sim == "hammersley":
394                self.W_ = generate_hammersley(
395                    n_dims=n_features_1,
396                    n_points=self.n_hidden_features,
397                    seed=self.seed,
398                )
399            elif self.nodes_sim == "uniform":
400                self.W_ = generate_uniform(
401                    n_dims=n_features_1,
402                    n_points=self.n_hidden_features,
403                    seed=self.seed,
404                )
405            else:
406                self.W_ = generate_halton(
407                    n_dims=n_features_1,
408                    n_points=self.n_hidden_features,
409                    seed=self.seed,
410                )
411
412            # self.W_ = hash_sim[self.nodes_sim](
413            #         n_dims=n_features_1,
414            #         n_points=self.n_hidden_features,
415            #         seed=self.seed,
416            #     )
417
418            return mo.dropout(
419                x=self.activation_func(
420                    mo.safe_sparse_dot(
421                        a=mo.cbind(
422                            np.ones(scaled_X.shape[0]),
423                            scaled_X,
424                            backend=self.backend,
425                        ),
426                        b=self.W_,
427                        backend=self.backend,
428                    )
429                ),
430                drop_prob=self.dropout,
431                seed=self.seed,
432            )
433
434        # W is not None
435        # self.W_ = W
436        return mo.dropout(
437            x=self.activation_func(
438                mo.safe_sparse_dot(
439                    a=mo.cbind(
440                        np.ones(scaled_X.shape[0]),
441                        scaled_X,
442                        backend=self.backend,
443                    ),
444                    b=W,
445                    backend=self.backend,
446                )
447            ),
448            drop_prob=self.dropout,
449            seed=self.seed,
450        )

Create hidden layer.

Parameters:

scaled_X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer with W; otherwise computed internally

Returns:

Hidden layer matrix: {array-like}
def cook_training_set(self, y=None, X=None, W=None, **kwargs):
526    def cook_training_set(self, y=None, X=None, W=None, **kwargs):
527        """Create new hidden features for training set, with hidden layer, center the response.
528
529        Parameters:
530
531            y: array-like, shape = [n_samples]
532                Target values
533
534            X: {array-like}, shape = [n_samples, n_features]
535                Training vectors, where n_samples is the number
536                of samples and n_features is the number of features
537
538            W: {array-like}, shape = [n_features, hidden_features]
539                if provided, constructs the hidden layer via W
540
541        Returns:
542
543            (centered response, direct link + hidden layer matrix): {tuple}
544
545        """
546
547        # either X and y are stored or not
548        # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None))
549        if self.n_hidden_features > 0:  # has a hidden layer
550            assert (
551                len(self.type_scaling) >= 2
552            ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0"
553
554        if X is None:
555            if self.col_sample == 1:
556                input_X = self.X_
557            else:
558                n_features = self.X_.shape[1]
559                new_n_features = int(np.ceil(n_features * self.col_sample))
560                assert (
561                    new_n_features >= 1
562                ), "check class attribute 'col_sample' and the number of covariates provided for X"
563                np.random.seed(self.seed)
564                index_col = np.random.choice(
565                    range(n_features), size=new_n_features, replace=False
566                )
567                self.index_col_ = index_col
568                input_X = self.X_[:, self.index_col_]
569
570        else:  # X is not None # keep X vs self.X_
571            if isinstance(X, pd.DataFrame):
572                X = copy.deepcopy(X.values.astype(float))
573
574            if self.col_sample == 1:
575                input_X = X
576            else:
577                n_features = X.shape[1]
578                new_n_features = int(np.ceil(n_features * self.col_sample))
579                assert (
580                    new_n_features >= 1
581                ), "check class attribute 'col_sample' and the number of covariates provided for X"
582                np.random.seed(self.seed)
583                index_col = np.random.choice(
584                    range(n_features), size=new_n_features, replace=False
585                )
586                self.index_col_ = index_col
587                input_X = X[:, self.index_col_]
588
589        if self.n_clusters <= 0:
590            # data without any clustering: self.n_clusters is None -----
591
592            if self.n_hidden_features > 0:  # with hidden layer
593                self.nn_scaler_, scaled_X = mo.scale_covariates(
594                    input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_
595                )
596                Phi_X = (
597                    self.create_layer(scaled_X)
598                    if W is None
599                    else self.create_layer(scaled_X, W=W)
600                )
601                Z = (
602                    mo.cbind(input_X, Phi_X, backend=self.backend)
603                    if self.direct_link is True
604                    else Phi_X
605                )
606                self.scaler_, scaled_Z = mo.scale_covariates(
607                    Z, choice=self.type_scaling[0], scaler=self.scaler_
608                )
609            else:  # no hidden layer
610                Z = input_X
611                self.scaler_, scaled_Z = mo.scale_covariates(
612                    Z, choice=self.type_scaling[0], scaler=self.scaler_
613                )
614
615        else:
616            # data with clustering: self.n_clusters is not None ----- # keep
617
618            augmented_X = mo.cbind(
619                input_X,
620                self.encode_clusters(input_X, **kwargs),
621                backend=self.backend,
622            )
623
624            if self.n_hidden_features > 0:  # with hidden layer
625                self.nn_scaler_, scaled_X = mo.scale_covariates(
626                    augmented_X,
627                    choice=self.type_scaling[1],
628                    scaler=self.nn_scaler_,
629                )
630                Phi_X = (
631                    self.create_layer(scaled_X)
632                    if W is None
633                    else self.create_layer(scaled_X, W=W)
634                )
635                Z = (
636                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
637                    if self.direct_link is True
638                    else Phi_X
639                )
640                self.scaler_, scaled_Z = mo.scale_covariates(
641                    Z, choice=self.type_scaling[0], scaler=self.scaler_
642                )
643            else:  # no hidden layer
644                Z = augmented_X
645                self.scaler_, scaled_Z = mo.scale_covariates(
646                    Z, choice=self.type_scaling[0], scaler=self.scaler_
647                )
648
649        # Returning model inputs -----
650        if mx.is_factor(y) is False:  # regression
651            # center y
652            if y is None:
653                self.y_mean_, centered_y = mo.center_response(self.y_)
654            else:
655                self.y_mean_, centered_y = mo.center_response(y)
656
657            # y is subsampled
658            if self.row_sample < 1:
659                n, p = Z.shape
660
661                self.subsampler_ = (
662                    SubSampler(
663                        y=self.y_, row_sample=self.row_sample, seed=self.seed
664                    )
665                    if y is None
666                    else SubSampler(
667                        y=y, row_sample=self.row_sample, seed=self.seed
668                    )
669                )
670
671                self.index_row_ = self.subsampler_.subsample()
672
673                n_row_sample = len(self.index_row_)
674                # regression
675                return (
676                    centered_y[self.index_row_].reshape(n_row_sample),
677                    self.scaler_.transform(
678                        Z[self.index_row_, :].reshape(n_row_sample, p)
679                    ),
680                )
681            # y is not subsampled
682            # regression
683            return (centered_y, self.scaler_.transform(Z))
684
685        # classification
686        # y is subsampled
687        if self.row_sample < 1:
688            n, p = Z.shape
689
690            self.subsampler_ = (
691                SubSampler(
692                    y=self.y_, row_sample=self.row_sample, seed=self.seed
693                )
694                if y is None
695                else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed)
696            )
697
698            self.index_row_ = self.subsampler_.subsample()
699
700            n_row_sample = len(self.index_row_)
701            # classification
702            return (
703                y[self.index_row_].reshape(n_row_sample),
704                self.scaler_.transform(
705                    Z[self.index_row_, :].reshape(n_row_sample, p)
706                ),
707            )
708        # y is not subsampled
709        # classification
710        return (y, self.scaler_.transform(Z))

Create new hidden features for training set, with hidden layer, center the response.

Parameters:

y: array-like, shape = [n_samples]
    Target values

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

W: {array-like}, shape = [n_features, hidden_features]
    if provided, constructs the hidden layer via W

Returns:

(centered response, direct link + hidden layer matrix): {tuple}
def cook_test_set(self, X, **kwargs):
712    def cook_test_set(self, X, **kwargs):
713        """Transform data from test set, with hidden layer.
714
715        Parameters:
716
717            X: {array-like}, shape = [n_samples, n_features]
718                Training vectors, where n_samples is the number
719                of samples and n_features is the number of features
720
721            **kwargs: additional parameters to be passed to self.encode_cluster
722
723        Returns:
724
725            Transformed test set : {array-like}
726        """
727
728        if isinstance(X, pd.DataFrame):
729            X = copy.deepcopy(X.values.astype(float))
730
731        if len(X.shape) == 1:
732            X = X.reshape(1, -1)
733
734        if (
735            self.n_clusters == 0
736        ):  # data without clustering: self.n_clusters is None -----
737            if self.n_hidden_features > 0:
738                # if hidden layer
739                scaled_X = (
740                    self.nn_scaler_.transform(X)
741                    if (self.col_sample == 1)
742                    else self.nn_scaler_.transform(X[:, self.index_col_])
743                )
744                Phi_X = self.create_layer(scaled_X, self.W_)
745                if self.direct_link:
746                    return self.scaler_.transform(
747                        mo.cbind(scaled_X, Phi_X, backend=self.backend)
748                    )
749                # when self.direct_link == False
750                return self.scaler_.transform(Phi_X)
751            # if no hidden layer # self.n_hidden_features == 0
752            return self.scaler_.transform(X)
753
754        # data with clustering: self.n_clusters > 0 -----
755        if self.col_sample == 1:
756            predicted_clusters = self.encode_clusters(
757                X=X, predict=True, **kwargs
758            )
759            augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend)
760        else:
761            predicted_clusters = self.encode_clusters(
762                X=X[:, self.index_col_], predict=True, **kwargs
763            )
764            augmented_X = mo.cbind(
765                X[:, self.index_col_], predicted_clusters, backend=self.backend
766            )
767
768        if self.n_hidden_features > 0:  # if hidden layer
769            scaled_X = self.nn_scaler_.transform(augmented_X)
770            Phi_X = self.create_layer(scaled_X, self.W_)
771            if self.direct_link:
772                return self.scaler_.transform(
773                    mo.cbind(augmented_X, Phi_X, backend=self.backend)
774                )
775            return self.scaler_.transform(Phi_X)
776
777        # if no hidden layer
778        return self.scaler_.transform(augmented_X)

Transform data from test set, with hidden layer.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.encode_cluster

Returns:

Transformed test set : {array-like}
class BaseRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BaseRegressor(Base, RegressorMixin):
 16    """Random Vector Functional Link Network regression without shrinkage
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
 31            'uniform'
 32
 33        bias: boolean
 34            indicates if the hidden layer contains a bias term (True) or
 35            not (False)
 36
 37        dropout: float
 38            regularization parameter; (random) percentage of nodes dropped out
 39            of the training
 40
 41        direct_link: boolean
 42            indicates if the original features are included (True) in model's
 43            fitting or not (False)
 44
 45        n_clusters: int
 46            number of clusters for type_clust='kmeans' or type_clust='gmm'
 47            clustering (could be 0: no clustering)
 48
 49        cluster_encode: bool
 50            defines how the variable containing clusters is treated (default is one-hot);
 51            if `False`, then labels are used, without one-hot encoding
 52
 53        type_clust: str
 54            type of clustering method: currently k-means ('kmeans') or Gaussian
 55            Mixture Model ('gmm')
 56
 57        type_scaling: a tuple of 3 strings
 58            scaling methods for inputs, hidden layer, and clustering respectively
 59            (and when relevant).
 60            Currently available: standardization ('std') or MinMax scaling ('minmax')
 61
 62        col_sample: float
 63            percentage of features randomly chosen for training
 64
 65        row_sample: float
 66            percentage of rows chosen for training, by stratified bootstrapping
 67
 68        seed: int
 69            reproducibility seed for nodes_sim=='uniform', clustering and dropout
 70
 71        backend: str
 72            "cpu" or "gpu" or "tpu"
 73
 74    Attributes:
 75
 76        beta_: vector
 77            regression coefficients
 78
 79        GCV_: float
 80            Generalized Cross-Validation error
 81
 82    """
 83
 84    # construct the object -----
 85
 86    def __init__(
 87        self,
 88        n_hidden_features=5,
 89        activation_name="relu",
 90        a=0.01,
 91        nodes_sim="sobol",
 92        bias=True,
 93        dropout=0,
 94        direct_link=True,
 95        n_clusters=2,
 96        cluster_encode=True,
 97        type_clust="kmeans",
 98        type_scaling=("std", "std", "std"),
 99        col_sample=1,
100        row_sample=1,
101        seed=123,
102        backend="cpu",
103    ):
104        super().__init__(
105            n_hidden_features=n_hidden_features,
106            activation_name=activation_name,
107            a=a,
108            nodes_sim=nodes_sim,
109            bias=bias,
110            dropout=dropout,
111            direct_link=direct_link,
112            n_clusters=n_clusters,
113            cluster_encode=cluster_encode,
114            type_clust=type_clust,
115            type_scaling=type_scaling,
116            col_sample=col_sample,
117            row_sample=row_sample,
118            seed=seed,
119            backend=backend,
120        )
121
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self
152
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Random Vector Functional Link Network regression without shrinkage

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or
    not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for type_clust='kmeans' or type_clust='gmm'
    clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot);
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of features randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform', clustering and dropout

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: vector
    regression coefficients

GCV_: float
    Generalized Cross-Validation error
def fit(self, X, y, **kwargs):
122    def fit(self, X, y, **kwargs):
123        """Fit BaseRegressor to training data (X, y)
124
125        Parameters:
126
127            X: {array-like}, shape = [n_samples, n_features]
128                Training vectors, where n_samples is the number
129                of samples and n_features is the number of features
130
131            y: array-like, shape = [n_samples]
132                Target values
133
134            **kwargs: additional parameters to be passed to self.cook_training_set
135
136        Returns:
137
138            self: object
139        """
140
141        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
142
143        fit_obj = lmf.beta_Sigma_hat(
144            X=scaled_Z, y=centered_y, backend=self.backend
145        )
146
147        self.beta_ = fit_obj["beta_hat"]
148
149        self.GCV_ = fit_obj["GCV"]
150
151        return self

Fit BaseRegressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to self.cook_training_set

Returns:

self: object
def predict(self, X, **kwargs):
153    def predict(self, X, **kwargs):
154        """Predict test data X.
155
156        Parameters:
157
158            X: {array-like}, shape = [n_samples, n_features]
159                Training vectors, where n_samples is the number
160                of samples and n_features is the number of features
161
162            **kwargs: additional parameters to be passed to self.cook_test_set
163
164        Returns:
165
166            model predictions: {array-like}
167        """
168
169        if len(X.shape) == 1:
170            n_features = X.shape[0]
171            new_X = mo.rbind(
172                X.reshape(1, n_features),
173                np.ones(n_features).reshape(1, n_features),
174            )
175
176            return (
177                self.y_mean_
178                + mo.safe_sparse_dot(
179                    a=self.cook_test_set(new_X, **kwargs),
180                    b=self.beta_,
181                    backend=self.backend,
182                )
183            )[0]
184
185        return self.y_mean_ + mo.safe_sparse_dot(
186            a=self.cook_test_set(X, **kwargs),
187            b=self.beta_,
188            backend=self.backend,
189        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFLRegressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFLRegressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with one prior
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s: float
 61            std. dev. of regression parameters in Bayesian Ridge Regression
 62
 63        sigma: float
 64            std. dev. of residuals in Bayesian Ridge Regression
 65
 66        return_std: boolean
 67            if True, uncertainty around predictions is evaluated
 68
 69        backend: str
 70            "cpu" or "gpu" or "tpu"
 71
 72    Attributes:
 73
 74        beta_: array-like
 75            regression''s coefficients
 76
 77        Sigma_: array-like
 78            covariance of the distribution of fitted parameters
 79
 80        GCV_: float
 81            Generalized cross-validation error
 82
 83        y_mean_: float
 84            average response
 85
 86    Examples:
 87
 88    ```python
 89    TBD
 90    ```
 91
 92    """
 93
 94    # construct the object -----
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        direct_link=True,
105        n_clusters=2,
106        cluster_encode=True,
107        type_clust="kmeans",
108        type_scaling=("std", "std", "std"),
109        seed=123,
110        s=0.1,
111        sigma=0.05,
112        return_std=True,
113        backend="cpu",
114    ):
115        super().__init__(
116            n_hidden_features=n_hidden_features,
117            activation_name=activation_name,
118            a=a,
119            nodes_sim=nodes_sim,
120            bias=bias,
121            dropout=dropout,
122            direct_link=direct_link,
123            n_clusters=n_clusters,
124            cluster_encode=cluster_encode,
125            type_clust=type_clust,
126            type_scaling=type_scaling,
127            seed=seed,
128            backend=backend,
129        )
130        self.s = s
131        self.sigma = sigma
132        self.beta_ = None
133        self.Sigma_ = None
134        self.GCV_ = None
135        self.return_std = return_std
136
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self
178
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with one prior

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s: float
    std. dev. of regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
137    def fit(self, X, y, **kwargs):
138        """Fit BayesianRVFLRegressor to training data (X, y).
139
140        Parameters:
141
142            X: {array-like}, shape = [n_samples, n_features]
143                Training vectors, where n_samples is the number
144                of samples and n_features is the number of features.
145
146            y: array-like, shape = [n_samples]
147                Target values.
148
149            **kwargs: additional parameters to be passed to
150                    self.cook_training_set
151
152        Returns:
153
154            self: object
155
156        """
157
158        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
159
160        fit_obj = lmf.beta_Sigma_hat_rvfl(
161            X=scaled_Z,
162            y=centered_y,
163            s=self.s,
164            sigma=self.sigma,
165            fit_intercept=False,
166            return_cov=self.return_std,
167            backend=self.backend,
168        )
169
170        self.beta_ = fit_obj["beta_hat"]
171
172        if self.return_std == True:
173            self.Sigma_ = fit_obj["Sigma_hat"]
174
175        self.GCV_ = fit_obj["GCV"]
176
177        return self

Fit BayesianRVFLRegressor to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
179    def predict(self, X, return_std=False, **kwargs):
180        """Predict test data X.
181
182        Parameters:
183
184            X: {array-like}, shape = [n_samples, n_features]
185                Training vectors, where n_samples is the number
186                of samples and n_features is the number of features.
187
188            return_std: {boolean}, standard dev. is returned or not
189
190            **kwargs: additional parameters to be passed to
191                    self.cook_test_set
192
193        Returns:
194
195            model predictions: {array-like}
196
197        """
198
199        if len(X.shape) == 1:  # one observation in the test set only
200            n_features = X.shape[0]
201            new_X = mo.rbind(
202                x=X.reshape(1, n_features),
203                y=np.ones(n_features).reshape(1, n_features),
204                backend=self.backend,
205            )
206
207        self.return_std = return_std
208
209        if self.return_std == False:
210            if len(X.shape) == 1:
211                return (
212                    self.y_mean_
213                    + mo.safe_sparse_dot(
214                        a=self.cook_test_set(new_X, **kwargs),
215                        b=self.beta_,
216                        backend=self.backend,
217                    )
218                )[0]
219
220            return self.y_mean_ + mo.safe_sparse_dot(
221                a=self.cook_test_set(X, **kwargs),
222                b=self.beta_,
223                backend=self.backend,
224            )
225
226        else:  # confidence interval required for preds?
227            if len(X.shape) == 1:
228                Z = self.cook_test_set(new_X, **kwargs)
229
230                pred_obj = lmf.beta_Sigma_hat_rvfl(
231                    s=self.s,
232                    sigma=self.sigma,
233                    X_star=Z,
234                    return_cov=True,
235                    beta_hat_=self.beta_,
236                    Sigma_hat_=self.Sigma_,
237                    backend=self.backend,
238                )
239
240                return (
241                    self.y_mean_ + pred_obj["preds"][0],
242                    pred_obj["preds_std"][0],
243                )
244
245            Z = self.cook_test_set(X, **kwargs)
246
247            pred_obj = lmf.beta_Sigma_hat_rvfl(
248                s=self.s,
249                sigma=self.sigma,
250                X_star=Z,
251                return_cov=True,
252                beta_hat_=self.beta_,
253                Sigma_hat_=self.Sigma_,
254                backend=self.backend,
255            )
256
257            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class BayesianRVFL2Regressor(nnetsauce.Base, sklearn.base.RegressorMixin):
 15class BayesianRVFL2Regressor(Base, RegressorMixin):
 16    """Bayesian Random Vector Functional Link Network regression with two priors
 17
 18    Parameters:
 19
 20        n_hidden_features: int
 21            number of nodes in the hidden layer
 22
 23        activation_name: str
 24            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 25
 26        a: float
 27            hyperparameter for 'prelu' or 'elu' activation function
 28
 29        nodes_sim: str
 30            type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
 31
 32        bias: boolean
 33            indicates if the hidden layer contains a bias term (True) or not (False)
 34
 35        dropout: float
 36            regularization parameter; (random) percentage of nodes dropped out
 37            of the training
 38
 39        direct_link: boolean
 40            indicates if the original features are included (True) in model''s fitting or not (False)
 41
 42        n_clusters: int
 43            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
 44
 45        cluster_encode: bool
 46            defines how the variable containing clusters is treated (default is one-hot)
 47            if `False`, then labels are used, without one-hot encoding
 48
 49        type_clust: str
 50            type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
 51
 52        type_scaling: a tuple of 3 strings
 53            scaling methods for inputs, hidden layer, and clustering respectively
 54            (and when relevant).
 55            Currently available: standardization ('std') or MinMax scaling ('minmax')
 56
 57        seed: int
 58            reproducibility seed for nodes_sim=='uniform'
 59
 60        s1: float
 61            std. dev. of init. regression parameters in Bayesian Ridge Regression
 62
 63        s2: float
 64            std. dev. of augmented regression parameters in Bayesian Ridge Regression
 65
 66        sigma: float
 67            std. dev. of residuals in Bayesian Ridge Regression
 68
 69        return_std: boolean
 70            if True, uncertainty around predictions is evaluated
 71
 72        backend: str
 73            "cpu" or "gpu" or "tpu"
 74
 75    Attributes:
 76
 77        beta_: array-like
 78            regression''s coefficients
 79
 80        Sigma_: array-like
 81            covariance of the distribution of fitted parameters
 82
 83        GCV_: float
 84            Generalized cross-validation error
 85
 86        y_mean_: float
 87            average response
 88
 89    Examples:
 90
 91    ```python
 92    TBD
 93    ```
 94
 95    """
 96
 97    # construct the object -----
 98
 99    def __init__(
100        self,
101        n_hidden_features=5,
102        activation_name="relu",
103        a=0.01,
104        nodes_sim="sobol",
105        bias=True,
106        dropout=0,
107        direct_link=True,
108        n_clusters=0,
109        cluster_encode=True,
110        type_clust="kmeans",
111        type_scaling=("std", "std", "std"),
112        seed=123,
113        s1=0.1,
114        s2=0.1,
115        sigma=0.05,
116        return_std=True,
117        backend="cpu",
118    ):
119        super().__init__(
120            n_hidden_features=n_hidden_features,
121            activation_name=activation_name,
122            a=a,
123            nodes_sim=nodes_sim,
124            bias=bias,
125            dropout=dropout,
126            direct_link=direct_link,
127            n_clusters=n_clusters,
128            cluster_encode=cluster_encode,
129            type_clust=type_clust,
130            type_scaling=type_scaling,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.s1 = s1
136        self.s2 = s2
137        self.sigma = sigma
138        self.beta_ = None
139        self.Sigma_ = None
140        self.GCV_ = None
141        self.return_std = return_std
142        self.coef_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self
207
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Bayesian Random Vector Functional Link Network regression with two priors

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original features are included (True) in model''s fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

seed: int
    reproducibility seed for nodes_sim=='uniform'

s1: float
    std. dev. of init. regression parameters in Bayesian Ridge Regression

s2: float
    std. dev. of augmented regression parameters in Bayesian Ridge Regression

sigma: float
    std. dev. of residuals in Bayesian Ridge Regression

return_std: boolean
    if True, uncertainty around predictions is evaluated

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: array-like
    regression''s coefficients

Sigma_: array-like
    covariance of the distribution of fitted parameters

GCV_: float
    Generalized cross-validation error

y_mean_: float
    average response

Examples:

TBD
def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit BayesianRVFL2Regressor to training data (X, y)
146
147        Parameters:
148
149            X: {array-like}, shape = [n_samples, n_features]
150                Training vectors, where n_samples is the number
151                of samples and n_features is the number of features
152
153            y: array-like, shape = [n_samples]
154                Target values
155
156            **kwargs: additional parameters to be passed to
157                    self.cook_training_set
158
159        Returns:
160
161            self: object
162
163        """
164
165        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
166
167        n, p = X.shape
168        q = self.n_hidden_features
169
170        if self.direct_link == True:
171            r = p + self.n_clusters
172
173            block11 = (self.s1**2) * np.eye(r)
174            block12 = np.zeros((r, q))
175            block21 = np.zeros((q, r))
176            block22 = (self.s2**2) * np.eye(q)
177
178            Sigma_prior = mo.rbind(
179                x=mo.cbind(x=block11, y=block12, backend=self.backend),
180                y=mo.cbind(x=block21, y=block22, backend=self.backend),
181                backend=self.backend,
182            )
183
184        else:
185            Sigma_prior = (self.s2**2) * np.eye(q)
186
187        fit_obj = lmf.beta_Sigma_hat_rvfl2(
188            X=scaled_Z,
189            y=centered_y,
190            Sigma=Sigma_prior,
191            sigma=self.sigma,
192            fit_intercept=False,
193            return_cov=self.return_std,
194            backend=self.backend,
195        )
196
197        self.beta_ = fit_obj["beta_hat"]
198
199        self.coef_ = self.beta_
200
201        if self.return_std == True:
202            self.Sigma_ = fit_obj["Sigma_hat"]
203
204        self.GCV_ = fit_obj["GCV"]
205
206        return self

Fit BayesianRVFL2Regressor to training data (X, y)

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features

y: array-like, shape = [n_samples]
    Target values

**kwargs: additional parameters to be passed to
        self.cook_training_set

Returns:

self: object
def predict(self, X, return_std=False, **kwargs):
208    def predict(self, X, return_std=False, **kwargs):
209        """Predict test data X.
210
211        Parameters:
212
213            X: {array-like}, shape = [n_samples, n_features]
214                Training vectors, where n_samples is the number
215                of samples and n_features is the number of features.
216
217            return_std: {boolean}, standard dev. is returned or not
218
219            **kwargs: additional parameters to be passed to
220                    self.cook_test_set
221
222        Returns:
223
224            model predictions: {array-like}
225
226        """
227
228        if len(X.shape) == 1:  # one observation in the test set only
229            n_features = X.shape[0]
230            new_X = mo.rbind(
231                x=X.reshape(1, n_features),
232                y=np.ones(n_features).reshape(1, n_features),
233                backend=self.backend,
234            )
235
236        self.return_std = return_std
237
238        if self.return_std == False:
239            if len(X.shape) == 1:
240                return (
241                    self.y_mean_
242                    + mo.safe_sparse_dot(
243                        self.cook_test_set(new_X, **kwargs),
244                        self.beta_,
245                        backend=self.backend,
246                    )
247                )[0]
248
249            return self.y_mean_ + mo.safe_sparse_dot(
250                self.cook_test_set(X, **kwargs),
251                self.beta_,
252                backend=self.backend,
253            )
254
255        else:  # confidence interval required for preds?
256            if len(X.shape) == 1:
257                Z = self.cook_test_set(new_X, **kwargs)
258
259                pred_obj = lmf.beta_Sigma_hat_rvfl2(
260                    X_star=Z,
261                    return_cov=self.return_std,
262                    beta_hat_=self.beta_,
263                    Sigma_hat_=self.Sigma_,
264                    backend=self.backend,
265                )
266
267                return (
268                    self.y_mean_ + pred_obj["preds"][0],
269                    pred_obj["preds_std"][0],
270                )
271
272            Z = self.cook_test_set(X, **kwargs)
273
274            pred_obj = lmf.beta_Sigma_hat_rvfl2(
275                X_star=Z,
276                return_cov=self.return_std,
277                beta_hat_=self.beta_,
278                Sigma_hat_=self.Sigma_,
279                backend=self.backend,
280            )
281
282            return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_std: {boolean}, standard dev. is returned or not

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class ClassicalMTS(nnetsauce.MTS):
 42class ClassicalMTS(MTS):
 43    """Time series with statistical models (statsmodels), mostly for benchmarks
 44
 45    Parameters:
 46
 47        model: type of model: str.
 48            currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
 49            Default is None
 50
 51        obj: object
 52            A time series model from statsmodels
 53
 54    Attributes:
 55
 56        df_: data frame
 57            the input data frame, in case a data.frame is provided to `fit`
 58
 59        level_: int
 60            level of confidence for prediction intervals (default is 95)
 61
 62    Examples:
 63    See examples/classical_mts_timeseries.py
 64    """
 65
 66    # construct the object -----
 67
 68    def __init__(self, model="VAR", obj=None):
 69        if obj is not None:
 70            self.model = None
 71            self.obj = obj
 72        else:
 73            self.model = model
 74            if self.model == "VAR":
 75                self.obj = VAR
 76            elif self.model == "VECM":
 77                self.obj = VECM
 78            elif self.model == "ARIMA":
 79                self.obj = ARIMA
 80            elif self.model == "ETS":
 81                self.obj = ExponentialSmoothing
 82            elif self.model == "Theta":
 83                self.obj = ThetaModel
 84            else:
 85                raise ValueError("model not recognized")
 86        self.n_series = None
 87        self.replications = None
 88        self.mean_ = None
 89        self.upper_ = None
 90        self.lower_ = None
 91        self.output_dates_ = None
 92        self.alpha_ = None
 93        self.df_ = None
 94        self.residuals_ = []
 95        self.sims_ = None
 96        self.level_ = None
 97
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self
168
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )
338
339    def _compute_confidence_intervals(self, forecast_result, alpha):
340        """
341        Compute confidence intervals for VECM forecasts.
342        Uses the covariance of residuals to approximate the confidence intervals.
343        """
344        residuals = self.obj.resid
345        cov_matrix = np.cov(residuals.T)  # Covariance matrix of residuals
346        std_errors = np.sqrt(np.diag(cov_matrix))  # Standard errors
347
348        z_value = norm.ppf(1 - alpha / 2)  # Z-score for the given alpha level
349        lower_bound = forecast_result - z_value * std_errors
350        upper_bound = forecast_result + z_value * std_errors
351
352        return lower_bound, upper_bound
353
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)
427
428    def plot(self, series=None, type_axis="dates", type_plot="pi"):
429        """Plot time series forecast
430
431        Parameters:
432
433        series: {integer} or {string}
434            series index or name
435
436        """
437
438        assert all(
439            [
440                self.mean_ is not None,
441                self.lower_ is not None,
442                self.upper_ is not None,
443                self.output_dates_ is not None,
444            ]
445        ), "model forecasting must be obtained first (with predict)"
446
447        if series is None:
448            assert (
449                self.n_series == 1
450            ), "please specify series index or name (n_series > 1)"
451            series = 0
452
453        if isinstance(series, str):
454            assert (
455                series in self.series_names
456            ), f"series {series} doesn't exist in the input dataset"
457            series_idx = self.df_.columns.get_loc(series)
458        else:
459            assert isinstance(series, int) and (
460                0 <= series < self.n_series
461            ), f"check series index (< {self.n_series})"
462            series_idx = series
463
464        if isinstance(self.df_, pd.DataFrame):
465            y_all = list(self.df_.iloc[:, series_idx]) + list(
466                self.mean_.iloc[:, series_idx]
467            )
468            y_test = list(self.mean_.iloc[:, series_idx])
469        else:
470            y_all = list(self.df_.values) + list(self.mean_.values)
471            y_test = list(self.mean_.values)
472        n_points_all = len(y_all)
473        n_points_train = self.df_.shape[0]
474
475        if type_axis == "numeric":
476            x_all = [i for i in range(n_points_all)]
477            x_test = [i for i in range(n_points_train, n_points_all)]
478
479        if type_axis == "dates":  # use dates
480            x_all = np.concatenate(
481                (self.input_dates.values, self.output_dates_.values), axis=None
482            )
483            x_test = self.output_dates_.values
484
485        if type_plot == "pi":
486            fig, ax = plt.subplots()
487            ax.plot(x_all, y_all, "-")
488            ax.plot(x_test, y_test, "-", color="orange")
489            try:
490                ax.fill_between(
491                    x_test,
492                    self.lower_.iloc[:, series_idx],
493                    self.upper_.iloc[:, series_idx],
494                    alpha=0.2,
495                    color="orange",
496                )
497            except Exception:
498                ax.fill_between(
499                    x_test,
500                    self.lower_.values,
501                    self.upper_.values,
502                    alpha=0.2,
503                    color="orange",
504                )
505            if self.replications is None:
506                if self.n_series > 1:
507                    plt.title(
508                        f"prediction intervals for {series}",
509                        loc="left",
510                        fontsize=12,
511                        fontweight=0,
512                        color="black",
513                    )
514                else:
515                    plt.title(
516                        f"prediction intervals for input time series",
517                        loc="left",
518                        fontsize=12,
519                        fontweight=0,
520                        color="black",
521                    )
522                plt.show()
523            else:  # self.replications is not None
524                if self.n_series > 1:
525                    plt.title(
526                        f"prediction intervals for {self.replications} simulations of {series}",
527                        loc="left",
528                        fontsize=12,
529                        fontweight=0,
530                        color="black",
531                    )
532                else:
533                    plt.title(
534                        f"prediction intervals for {self.replications} simulations of input time series",
535                        loc="left",
536                        fontsize=12,
537                        fontweight=0,
538                        color="black",
539                    )
540                plt.show()
541
542        if type_plot == "spaghetti":
543            palette = plt.get_cmap("Set1")
544            sims_ix = getsims(self.sims_, series_idx)
545            plt.plot(x_all, y_all, "-")
546            for col_ix in range(
547                sims_ix.shape[1]
548            ):  # avoid this when there are thousands of simulations
549                plt.plot(
550                    x_test,
551                    sims_ix[:, col_ix],
552                    "-",
553                    color=palette(col_ix),
554                    linewidth=1,
555                    alpha=0.9,
556                )
557            plt.plot(x_all, y_all, "-", color="black")
558            plt.plot(x_test, y_test, "-", color="blue")
559            # Add titles
560            if self.n_series > 1:
561                plt.title(
562                    f"{self.replications} simulations of {series}",
563                    loc="left",
564                    fontsize=12,
565                    fontweight=0,
566                    color="black",
567                )
568            else:
569                plt.title(
570                    f"{self.replications} simulations of input time series",
571                    loc="left",
572                    fontsize=12,
573                    fontweight=0,
574                    color="black",
575                )
576            plt.xlabel("Time")
577            plt.ylabel("Values")
578            # Show the graph
579            plt.show()
580
581    def cross_val_score(
582        self,
583        X,
584        scoring="root_mean_squared_error",
585        n_jobs=None,
586        verbose=0,
587        xreg=None,
588        initial_window=5,
589        horizon=3,
590        fixed_window=False,
591        show_progress=True,
592        level=95,
593        **kwargs,
594    ):
595        """Evaluate a score by time series cross-validation.
596
597        Parameters:
598
599            X: {array-like, sparse matrix} of shape (n_samples, n_features)
600                The data to fit.
601
602            scoring: str or a function
603                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
604                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
605                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
606                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
607
608            n_jobs: int, default=None
609                Number of jobs to run in parallel.
610
611            verbose: int, default=0
612                The verbosity level.
613
614            xreg: array-like, optional (default=None)
615                Additional (external) regressors to be passed to `fit`
616                xreg must be in 'increasing' order (most recent observations last)
617
618            initial_window: int
619                initial number of consecutive values in each training set sample
620
621            horizon: int
622                number of consecutive values in test set sample
623
624            fixed_window: boolean
625                if False, all training samples start at index 0, and the training
626                window's size is increasing.
627                if True, the training window's size is fixed, and the window is
628                rolling forward
629
630            show_progress: boolean
631                if True, a progress bar is printed
632
633            **kwargs: dict
634                additional parameters to be passed to `fit` and `predict`
635
636        Returns:
637
638            A tuple: descriptive statistics or errors and raw errors
639
640        """
641        tscv = TimeSeriesSplit()
642
643        tscv_obj = tscv.split(
644            X,
645            initial_window=initial_window,
646            horizon=horizon,
647            fixed_window=fixed_window,
648        )
649
650        if isinstance(scoring, str):
651            assert scoring in (
652                "root_mean_squared_error",
653                "mean_squared_error",
654                "mean_error",
655                "mean_absolute_error",
656                "mean_percentage_error",
657                "mean_absolute_percentage_error",
658                "winkler_score",
659                "coverage",
660            ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
661
662            def err_func(X_test, X_pred, scoring):
663                if (self.replications is not None) or (
664                    self.type_pi == "gaussian"
665                ):  # probabilistic
666                    if scoring == "winkler_score":
667                        return winkler_score(X_pred, X_test, level=level)
668                    elif scoring == "coverage":
669                        return coverage(X_pred, X_test, level=level)
670                    else:
671                        return mean_errors(
672                            pred=X_pred.mean, actual=X_test, scoring=scoring
673                        )
674                else:  # not probabilistic
675                    return mean_errors(
676                        pred=X_pred, actual=X_test, scoring=scoring
677                    )
678
679        else:  # isinstance(scoring, str) = False
680            err_func = scoring
681
682        errors = []
683
684        train_indices = []
685
686        test_indices = []
687
688        for train_index, test_index in tscv_obj:
689            train_indices.append(train_index)
690            test_indices.append(test_index)
691
692        if show_progress is True:
693            iterator = tqdm(
694                zip(train_indices, test_indices), total=len(train_indices)
695            )
696        else:
697            iterator = zip(train_indices, test_indices)
698
699        for train_index, test_index in iterator:
700            if verbose == 1:
701                print(f"TRAIN: {train_index}")
702                print(f"TEST: {test_index}")
703
704            if isinstance(X, pd.DataFrame):
705                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
706                X_test = X.iloc[test_index, :]
707            else:
708                self.fit(X[train_index, :], xreg=xreg, **kwargs)
709                X_test = X[test_index, :]
710            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
711
712            errors.append(err_func(X_test, X_pred, scoring))
713
714        res = np.asarray(errors)
715
716        return res, describe(res)

Time series with statistical models (statsmodels), mostly for benchmarks

Parameters:

model: type of model: str.
    currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
    Default is None

obj: object
    A time series model from statsmodels

Attributes:

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

level_: int
    level of confidence for prediction intervals (default is 95)

Examples: See examples/classical_mts_timeseries.py

def fit(self, X, **kwargs):
 98    def fit(self, X, **kwargs):
 99        """Fit ClassicalMTS model to training data X, with optional regressors xreg
100
101        Parameters:
102
103        X: {array-like}, shape = [n_samples, n_features]
104            Training time series, where n_samples is the number
105            of samples and n_features is the number of features;
106            X must be in increasing order (most recent observations last)
107
108        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
109
110        Returns:
111
112        self: object
113        """
114
115        try:
116            self.n_series = X.shape[1]
117        except Exception:
118            self.n_series = 1
119
120        if (isinstance(X, pd.DataFrame) is False) and isinstance(
121            X, pd.Series
122        ) is False:  # input data set is a numpy array
123            X = pd.DataFrame(X)
124            if self.n_series > 1:
125                self.series_names = [
126                    "series" + str(i) for i in range(X.shape[1])
127                ]
128            else:
129                self.series_names = "series0"
130
131        else:  # input data set is a DataFrame or Series with column names
132            X_index = None
133            if X.index is not None and len(X.shape) > 1:
134                X_index = X.index
135                X = copy.deepcopy(mo.convert_df_to_numeric(X))
136            if X_index is not None:
137                try:
138                    X.index = X_index
139                except Exception:
140                    pass
141            if isinstance(X, pd.DataFrame):
142                self.series_names = X.columns.tolist()
143            else:
144                self.series_names = X.name
145
146        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
147            self.df_ = X
148            X = X.values
149            self.df_.columns = self.series_names
150            self.input_dates = ts.compute_input_dates(self.df_)
151        else:
152            self.df_ = pd.DataFrame(X, columns=self.series_names)
153
154        if self.model == "Theta":
155            try:
156                self.obj = self.obj(self.df_, **kwargs).fit()
157            except Exception as e:
158                self.obj = self.obj(self.df_.values, **kwargs).fit()
159            self.residuals_ = None
160        else:
161            self.obj = self.obj(X, **kwargs).fit()
162            try:
163                self.residuals_ = self.obj.resid
164            except Exception as e:  # Theta
165                self.residuals_ = None
166
167        return self

Fit ClassicalMTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, **kwargs):
169    def predict(self, h=5, level=95, **kwargs):
170        """Forecast all the time series, h steps ahead
171
172        Parameters:
173
174        h: {integer}
175            Forecasting horizon
176
177        **kwargs: additional parameters to be passed to
178                self.cook_test_set
179
180        Returns:
181
182        model predictions for horizon = h: {array-like}
183
184        """
185
186        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
187        self.level_ = level
188        self.lower_ = None  # do not remove (/!\)
189        self.upper_ = None  # do not remove (/!\)
190        self.sims_ = None  # do not remove (/!\)
191        self.level_ = level
192        self.alpha_ = 100 - level
193
194        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
195
196        # Named tuple for forecast results
197        DescribeResult = namedtuple(
198            "DescribeResult", ("mean", "lower", "upper")
199        )
200
201        if (
202            self.obj is not None
203        ):  # try all the special cases of the else section (there's probably a better way)
204            try:
205                (
206                    mean_forecast,
207                    lower_bound,
208                    upper_bound,
209                ) = self.obj.forecast_interval(
210                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
211                )
212
213            except Exception as e:
214                try:
215                    forecast_result = self.obj.predict(steps=h)
216                    mean_forecast = forecast_result
217                    (
218                        lower_bound,
219                        upper_bound,
220                    ) = self._compute_confidence_intervals(
221                        forecast_result, alpha=self.alpha_ / 100, **kwargs
222                    )
223
224                except Exception as e:
225                    try:
226                        forecast_result = self.obj.get_forecast(steps=h)
227                        mean_forecast = forecast_result.predicted_mean
228                        lower_bound = forecast_result.conf_int()[:, 0]
229                        upper_bound = forecast_result.conf_int()[:, 1]
230
231                    except Exception as e:
232                        try:
233                            forecast_result = self.obj.forecast(steps=h)
234                            residuals = self.obj.resid
235                            std_errors = np.std(residuals)
236                            mean_forecast = forecast_result
237                            lower_bound = (
238                                forecast_result - pi_multiplier * std_errors
239                            )
240                            upper_bound = (
241                                forecast_result + pi_multiplier * std_errors
242                            )
243
244                        except Exception as e:
245                            try:
246                                mean_forecast = self.obj.forecast(
247                                    steps=h
248                                ).values
249                                forecast_result = self.obj.prediction_intervals(
250                                    steps=h, alpha=self.alpha_ / 100, **kwargs
251                                )
252                                lower_bound = forecast_result["lower"].values
253                                upper_bound = forecast_result["upper"].values
254                            except Exception:
255                                mean_forecast = self.obj.forecast(steps=h)
256                                forecast_result = self.obj.prediction_intervals(
257                                    steps=h, alpha=self.alpha_ / 100, **kwargs
258                                )
259                                lower_bound = forecast_result["lower"]
260                                upper_bound = forecast_result["upper"]
261
262        else:
263            if self.model == "VAR":
264                (
265                    mean_forecast,
266                    lower_bound,
267                    upper_bound,
268                ) = self.obj.forecast_interval(
269                    self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs
270                )
271
272            elif self.model == "VECM":
273                forecast_result = self.obj.predict(steps=h)
274                mean_forecast = forecast_result
275                lower_bound, upper_bound = self._compute_confidence_intervals(
276                    forecast_result, alpha=self.alpha_ / 100, **kwargs
277                )
278
279            elif self.model == "ARIMA":
280                forecast_result = self.obj.get_forecast(steps=h)
281                mean_forecast = forecast_result.predicted_mean
282                lower_bound = forecast_result.conf_int()[:, 0]
283                upper_bound = forecast_result.conf_int()[:, 1]
284
285            elif self.model == "ETS":
286                forecast_result = self.obj.forecast(steps=h)
287                residuals = self.obj.resid
288                std_errors = np.std(residuals)
289                mean_forecast = forecast_result
290                lower_bound = forecast_result - pi_multiplier * std_errors
291                upper_bound = forecast_result + pi_multiplier * std_errors
292
293            elif self.model == "Theta":
294                try:
295                    mean_forecast = self.obj.forecast(steps=h).values
296                    forecast_result = self.obj.prediction_intervals(
297                        steps=h, alpha=self.alpha_ / 100, **kwargs
298                    )
299                    lower_bound = forecast_result["lower"].values
300                    upper_bound = forecast_result["upper"].values
301                except Exception:
302                    mean_forecast = self.obj.forecast(steps=h)
303                    forecast_result = self.obj.prediction_intervals(
304                        steps=h, alpha=self.alpha_ / 100, **kwargs
305                    )
306                    lower_bound = forecast_result["lower"]
307                    upper_bound = forecast_result["upper"]
308
309            else:
310                raise ValueError("model not recognized")
311
312        try:
313            self.mean_ = pd.DataFrame(
314                mean_forecast,
315                columns=self.series_names,
316                index=self.output_dates_,
317            )
318            self.lower_ = pd.DataFrame(
319                lower_bound, columns=self.series_names, index=self.output_dates_
320            )
321            self.upper_ = pd.DataFrame(
322                upper_bound, columns=self.series_names, index=self.output_dates_
323            )
324        except Exception:
325            self.mean_ = pd.Series(
326                mean_forecast, name=self.series_names, index=self.output_dates_
327            )
328            self.lower_ = pd.Series(
329                lower_bound, name=self.series_names, index=self.output_dates_
330            )
331            self.upper_ = pd.Series(
332                upper_bound, name=self.series_names, index=self.output_dates_
333            )
334
335        return DescribeResult(
336            mean=self.mean_, lower=self.lower_, upper=self.upper_
337        )

Forecast all the time series, h steps ahead

Parameters:

h: {integer} Forecasting horizon

**kwargs: additional parameters to be passed to self.cook_test_set

Returns:

model predictions for horizon = h: {array-like}

def score(self, X, training_index, testing_index, scoring=None, **kwargs):
354    def score(self, X, training_index, testing_index, scoring=None, **kwargs):
355        """Train on training_index, score on testing_index."""
356
357        assert (
358            bool(set(training_index).intersection(set(testing_index))) == False
359        ), "Non-overlapping 'training_index' and 'testing_index' required"
360
361        # Dimensions
362        try:
363            # multivariate time series
364            n, p = X.shape
365        except:
366            # univariate time series
367            n = X.shape[0]
368            p = 1
369
370        # Training and testing sets
371        if p > 1:
372            X_train = X[training_index, :]
373            X_test = X[testing_index, :]
374        else:
375            X_train = X[training_index]
376            X_test = X[testing_index]
377
378        # Horizon
379        h = len(testing_index)
380        assert (
381            len(training_index) + h
382        ) <= n, "Please check lengths of training and testing windows"
383
384        # Fit and predict
385        self.fit(X_train, **kwargs)
386        preds = self.predict(h=h, **kwargs)
387
388        if scoring is None:
389            scoring = "neg_root_mean_squared_error"
390
391        # check inputs
392        assert scoring in (
393            "explained_variance",
394            "neg_mean_absolute_error",
395            "neg_mean_squared_error",
396            "neg_root_mean_squared_error",
397            "neg_mean_squared_log_error",
398            "neg_median_absolute_error",
399            "r2",
400        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
401                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
402                               'neg_median_absolute_error', 'r2')"
403
404        scoring_options = {
405            "explained_variance": skm2.explained_variance_score,
406            "neg_mean_absolute_error": skm2.mean_absolute_error,
407            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
408            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
409                np.mean((x - y) ** 2)
410            ),
411            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
412            "neg_median_absolute_error": skm2.median_absolute_error,
413            "r2": skm2.r2_score,
414        }
415
416        # if p > 1:
417        #     return tuple(
418        #         [
419        #             scoring_options[scoring](
420        #                 X_test[:, i], preds[:, i]#, **kwargs
421        #             )
422        #             for i in range(p)
423        #         ]
424        #     )
425        # else:
426        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class CustomClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 16class CustomClassifier(Custom, ClassifierMixin):
 17    """Custom Classification model
 18
 19    Attributes:
 20
 21        obj: object
 22            any object containing a method fit (obj.fit()) and a method predict
 23            (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model''s
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        cv_calibration: int, cross-validation generator, or iterable, default=2
 74            Determines the cross-validation splitting strategy. Same as
 75            `sklearn.calibration.CalibratedClassifierCV`
 76
 77        calibration_method: str
 78            {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
 79            The method to use for calibration. Same as
 80            `sklearn.calibration.CalibratedClassifierCV`
 81
 82        seed: int
 83            reproducibility seed for nodes_sim=='uniform'
 84
 85        backend: str
 86            "cpu" or "gpu" or "tpu"
 87
 88    Examples:
 89
 90    Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly
 91
 92    ```python
 93    import nnetsauce as ns
 94    from sklearn.ensemble import RandomForestClassifier
 95    from sklearn.model_selection import train_test_split
 96    from sklearn.datasets import load_digits
 97    from time import time
 98
 99    digits = load_digits()
100    X = digits.data
101    y = digits.target
102    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
103                                                        random_state=123)
104
105    # layer 1 (base layer) ----
106    layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
107
108    start = time()
109
110    layer1_regr.fit(X_train, y_train)
111
112    # Accuracy in layer 1
113    print(layer1_regr.score(X_test, y_test))
114
115    # layer 2 using layer 1 ----
116    layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
117                            direct_link=True, bias=True,
118                            nodes_sim='uniform', activation_name='relu',
119                            n_clusters=2, seed=123)
120    layer2_regr.fit(X_train, y_train)
121
122    # Accuracy in layer 2
123    print(layer2_regr.score(X_test, y_test))
124
125    # layer 3 using layer 2 ----
126    layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
127                            direct_link=True, bias=True, dropout=0.7,
128                            nodes_sim='uniform', activation_name='relu',
129                            n_clusters=2, seed=123)
130    layer3_regr.fit(X_train, y_train)
131
132    # Accuracy in layer 3
133    print(layer3_regr.score(X_test, y_test))
134
135    print(f"Elapsed {time() - start}")
136    ```
137
138    """
139
140    # construct the object -----
141    _estimator_type = "classifier"
142
143    def __init__(
144        self,
145        obj,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        col_sample=1,
158        row_sample=1,
159        cv_calibration=2,
160        calibration_method="sigmoid",
161        seed=123,
162        backend="cpu",
163    ):
164        super().__init__(
165            obj=obj,
166            n_hidden_features=n_hidden_features,
167            activation_name=activation_name,
168            a=a,
169            nodes_sim=nodes_sim,
170            bias=bias,
171            dropout=dropout,
172            direct_link=direct_link,
173            n_clusters=n_clusters,
174            cluster_encode=cluster_encode,
175            type_clust=type_clust,
176            type_scaling=type_scaling,
177            col_sample=col_sample,
178            row_sample=row_sample,
179            seed=seed,
180            backend=backend,
181        )
182        self.coef_ = None
183        self.intercept_ = None
184        self.type_fit = "classification"
185        self.cv_calibration = cv_calibration
186        self.calibration_method = calibration_method
187
188    def __sklearn_clone__(self):
189        """Create a clone of the estimator.
190
191        This is required for scikit-learn's calibration system to work properly.
192        """
193        # Create a new instance with the same parameters
194        clone = CustomClassifier(
195            obj=self.obj,
196            n_hidden_features=self.n_hidden_features,
197            activation_name=self.activation_name,
198            a=self.a,
199            nodes_sim=self.nodes_sim,
200            bias=self.bias,
201            dropout=self.dropout,
202            direct_link=self.direct_link,
203            n_clusters=self.n_clusters,
204            cluster_encode=self.cluster_encode,
205            type_clust=self.type_clust,
206            type_scaling=self.type_scaling,
207            col_sample=self.col_sample,
208            row_sample=self.row_sample,
209            cv_calibration=self.cv_calibration,
210            calibration_method=self.calibration_method,
211            seed=self.seed,
212            backend=self.backend,
213        )
214        return clone
215
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self
277
278    def partial_fit(self, X, y, sample_weight=None, **kwargs):
279        """Partial fit custom model to training data (X, y).
280
281        Parameters:
282
283            X: {array-like}, shape = [n_samples, n_features]
284                Subset of training vectors, where n_samples is the number
285                of samples and n_features is the number of features.
286
287            y: array-like, shape = [n_samples]
288                Subset of target values.
289
290            sample_weight: array-like, shape = [n_samples]
291                Sample weights.
292
293            **kwargs: additional parameters to be passed to
294                        self.cook_training_set or self.obj.fit
295
296        Returns:
297
298            self: object
299        """
300
301        if len(X.shape) == 1:
302            if isinstance(X, pd.DataFrame):
303                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
304            else:
305                X = X.reshape(1, -1)
306            y = np.array([y], dtype=int)
307
308        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
309        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
310
311        # if sample_weights, else: (must use self.row_index)
312        if sample_weight is not None:
313            try:
314                self.obj.partial_fit(
315                    scaled_Z,
316                    output_y,
317                    sample_weight=sample_weight[self.index_row_].ravel(),
318                    # **kwargs
319                )
320            except:
321                NotImplementedError
322
323            return self
324
325        # if sample_weight is None:
326        # try:
327        self.obj.partial_fit(scaled_Z, output_y)
328        # except:
329        #    raise NotImplementedError
330
331        self.classes_ = np.unique(y)  # for compatibility with sklearn
332        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
333
334        return self
335
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
365
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
395
396    def decision_function(self, X, **kwargs):
397        """Compute the decision function of X.
398
399        Parameters:
400            X: {array-like}, shape = [n_samples, n_features]
401                Samples to compute decision function for.
402
403            **kwargs: additional parameters to be passed to
404                    self.cook_test_set
405
406        Returns:
407            array-like of shape (n_samples,) or (n_samples, n_classes)
408            Decision function of the input samples. The order of outputs is the same
409            as that of the classes passed to fit.
410        """
411        if not hasattr(self.obj, "decision_function"):
412            # If base classifier doesn't have decision_function, use predict_proba
413            proba = self.predict_proba(X, **kwargs)
414            if proba.shape[1] == 2:
415                return proba[:, 1]  # For binary classification
416            return proba  # For multiclass
417
418        if len(X.shape) == 1:
419            n_features = X.shape[0]
420            new_X = mo.rbind(
421                X.reshape(1, n_features),
422                np.ones(n_features).reshape(1, n_features),
423            )
424
425            return (
426                self.obj.decision_function(
427                    self.cook_test_set(new_X, **kwargs), **kwargs
428                )
429            )[0]
430
431        return self.obj.decision_function(
432            self.cook_test_set(X, **kwargs), **kwargs
433        )
434
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))
487
488    @property
489    def _estimator_type(self):
490        return "classifier"

Custom Classification model

Attributes:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

cv_calibration: int, cross-validation generator, or iterable, default=2
    Determines the cross-validation splitting strategy. Same as
    `sklearn.calibration.CalibratedClassifierCV`

calibration_method: str
    {‘sigmoid’, ‘isotonic’}, default=’sigmoid’
    The method to use for calibration. Same as
    `sklearn.calibration.CalibratedClassifierCV`

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly

import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=123)

# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)

start = time()

layer1_regr.fit(X_train, y_train)

# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))

# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
                        direct_link=True, bias=True,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)

# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))

# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
                        direct_link=True, bias=True, dropout=0.7,
                        nodes_sim='uniform', activation_name='relu',
                        n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)

# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))

print(f"Elapsed {time() - start}")
def fit(self, X, y, sample_weight=None, **kwargs):
216    def fit(self, X, y, sample_weight=None, **kwargs):
217        """Fit custom model to training data (X, y).
218
219        Parameters:
220
221            X: {array-like}, shape = [n_samples, n_features]
222                Training vectors, where n_samples is the number
223                of samples and n_features is the number of features.
224
225            y: array-like, shape = [n_samples]
226                Target values.
227
228            sample_weight: array-like, shape = [n_samples]
229                Sample weights.
230
231            **kwargs: additional parameters to be passed to
232                        self.cook_training_set or self.obj.fit
233
234        Returns:
235
236            self: object
237        """
238
239        if len(X.shape) == 1:
240            if isinstance(X, pd.DataFrame):
241                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
242            else:
243                X = X.reshape(1, -1)
244
245        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
246        self.classes_ = np.unique(y)
247        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
248
249        # Wrap in CalibratedClassifierCV if needed
250        if self.cv_calibration is not None:
251            self.obj = CalibratedClassifierCV(
252                self.obj, cv=self.cv_calibration, method=self.calibration_method
253            )
254
255        # if sample_weights, else: (must use self.row_index)
256        if sample_weight is not None:
257            self.obj.fit(
258                scaled_Z,
259                output_y,
260                sample_weight=sample_weight[self.index_row_].ravel(),
261                **kwargs
262            )
263            return self
264
265        # if sample_weight is None:
266        self.obj.fit(scaled_Z, output_y, **kwargs)
267        self.classes_ = np.unique(y)  # for compatibility with sklearn
268        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
269
270        if hasattr(self.obj, "coef_"):
271            self.coef_ = self.obj.coef_
272
273        if hasattr(self.obj, "intercept_"):
274            self.intercept_ = self.obj.intercept_
275
276        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
            self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
336    def predict(self, X, **kwargs):
337        """Predict test data X.
338
339        Parameters:
340
341            X: {array-like}, shape = [n_samples, n_features]
342                Training vectors, where n_samples is the number
343                of samples and n_features is the number of features.
344
345            **kwargs: additional parameters to be passed to
346                    self.cook_test_set
347
348        Returns:
349
350            model predictions: {array-like}
351        """
352
353        if len(X.shape) == 1:
354            n_features = X.shape[0]
355            new_X = mo.rbind(
356                X.reshape(1, n_features),
357                np.ones(n_features).reshape(1, n_features),
358            )
359
360            return (
361                self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs)
362            )[0]
363
364        return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
366    def predict_proba(self, X, **kwargs):
367        """Predict probabilities for test data X.
368
369        Args:
370
371            X: {array-like}, shape = [n_samples, n_features]
372                Training vectors, where n_samples is the number
373                of samples and n_features is the number of features.
374
375            **kwargs: additional parameters to be passed to
376                    self.cook_test_set
377
378        Returns:
379
380            probability estimates for test data: {array-like}
381        """
382
383        if len(X.shape) == 1:
384            n_features = X.shape[0]
385            new_X = mo.rbind(
386                X.reshape(1, n_features),
387                np.ones(n_features).reshape(1, n_features),
388            )
389            return (
390                self.obj.predict_proba(
391                    self.cook_test_set(new_X, **kwargs), **kwargs
392                )
393            )[0]
394        return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
435    def score(self, X, y, scoring=None):
436        """Scoring function for classification.
437
438        Args:
439
440            X: {array-like}, shape = [n_samples, n_features]
441                Training vectors, where n_samples is the number
442                of samples and n_features is the number of features.
443
444            y: array-like, shape = [n_samples]
445                Target values.
446
447            scoring: str
448                scoring method (default is accuracy)
449
450        Returns:
451
452            score: float
453        """
454
455        if scoring is None:
456            scoring = "accuracy"
457
458        if scoring == "accuracy":
459            return skm2.accuracy_score(y, self.predict(X))
460
461        if scoring == "f1":
462            return skm2.f1_score(y, self.predict(X))
463
464        if scoring == "precision":
465            return skm2.precision_score(y, self.predict(X))
466
467        if scoring == "recall":
468            return skm2.recall_score(y, self.predict(X))
469
470        if scoring == "roc_auc":
471            return skm2.roc_auc_score(y, self.predict(X))
472
473        if scoring == "log_loss":
474            return skm2.log_loss(y, self.predict_proba(X))
475
476        if scoring == "balanced_accuracy":
477            return skm2.balanced_accuracy_score(y, self.predict(X))
478
479        if scoring == "average_precision":
480            return skm2.average_precision_score(y, self.predict(X))
481
482        if scoring == "neg_brier_score":
483            return -skm2.brier_score_loss(y, self.predict_proba(X))
484
485        if scoring == "neg_log_loss":
486            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class CustomRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomRegressor(Custom, RegressorMixin):
 19    """Custom Regression model
 20
 21    This class is used to 'augment' any regression model with transformed features.
 22
 23    Parameters:
 24
 25        obj: object
 26            any object containing a method fit (obj.fit()) and a method predict
 27            (obj.predict())
 28
 29        n_hidden_features: int
 30            number of nodes in the hidden layer
 31
 32        activation_name: str
 33            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 34
 35        a: float
 36            hyperparameter for 'prelu' or 'elu' activation function
 37
 38        nodes_sim: str
 39            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 40            'uniform'
 41
 42        bias: boolean
 43            indicates if the hidden layer contains a bias term (True) or not
 44            (False)
 45
 46        dropout: float
 47            regularization parameter; (random) percentage of nodes dropped out
 48            of the training
 49
 50        direct_link: boolean
 51            indicates if the original predictors are included (True) in model's
 52            fitting or not (False)
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        type_pi: str.
 72            type of prediction interval; currently `None` (split or local
 73            conformal without simulation), "kde" or "bootstrap" (simulated split
 74            conformal).
 75
 76        replications: int.
 77            number of replications (if needed) for predictive simulation.
 78            Used only in `self.predict`, for `self.kernel` in ('gaussian',
 79            'tophat') and `self.type_pi = 'kde'`. Default is `None`.
 80
 81        kernel: str.
 82            the kernel to use for kernel density estimation (used for predictive
 83            simulation in `self.predict`, with `method='splitconformal'` and
 84            `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
 85
 86        type_split: str.
 87            Type of splitting for conformal prediction. None (default), or
 88            "random" (random split of data) or "sequential" (sequential split of data)
 89
 90        col_sample: float
 91            percentage of covariates randomly chosen for training
 92
 93        row_sample: float
 94            percentage of rows chosen for training, by stratified bootstrapping
 95
 96        level: float
 97            confidence level for prediction intervals
 98
 99        pi_method: str
100            method for prediction intervals: 'splitconformal' or 'localconformal'
101
102        seed: int
103            reproducibility seed for nodes_sim=='uniform'
104
105        type_fit: str
106            'regression'
107
108        backend: str
109            "cpu" or "gpu" or "tpu"
110
111    Examples:
112
113    See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression)
114
115    """
116
117    # construct the object -----
118
119    def __init__(
120        self,
121        obj,
122        n_hidden_features=5,
123        activation_name="relu",
124        a=0.01,
125        nodes_sim="sobol",
126        bias=True,
127        dropout=0,
128        direct_link=True,
129        n_clusters=2,
130        cluster_encode=True,
131        type_clust="kmeans",
132        type_scaling=("std", "std", "std"),
133        type_pi=None,
134        replications=None,
135        kernel=None,
136        type_split=None,
137        col_sample=1,
138        row_sample=1,
139        level=None,
140        pi_method=None,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_hidden_features=n_hidden_features,
147            activation_name=activation_name,
148            a=a,
149            nodes_sim=nodes_sim,
150            bias=bias,
151            dropout=dropout,
152            direct_link=direct_link,
153            n_clusters=n_clusters,
154            cluster_encode=cluster_encode,
155            type_clust=type_clust,
156            type_scaling=type_scaling,
157            col_sample=col_sample,
158            row_sample=row_sample,
159            seed=seed,
160            backend=backend,
161        )
162
163        self.type_fit = "regression"
164        self.type_pi = type_pi
165        self.replications = replications
166        self.kernel = kernel
167        self.type_split = type_split
168        self.level = level
169        self.pi_method = pi_method
170        self.coef_ = None
171        self.intercept_ = None
172        self.X_ = None
173        self.y_ = None
174        self.aic_ = None
175        self.aicc_ = None
176        self.bic_ = None
177
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self
250
251    def partial_fit(self, X, y, **kwargs):
252        """Partial fit custom model to training data (X, y).
253
254        Parameters:
255
256            X: {array-like}, shape = [n_samples, n_features]
257                Subset of training vectors, where n_samples is the number
258                of samples and n_features is the number of features.
259
260            y: array-like, shape = [n_samples]
261                Subset of target values.
262
263            **kwargs: additional parameters to be passed to
264                self.cook_training_set or self.obj.fit
265
266        Returns:
267
268            self: object
269
270        """
271
272        if len(X.shape) == 1:
273            if isinstance(X, pd.DataFrame):
274                X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns)
275            else:
276                X = X.reshape(1, -1)
277            y = np.array([y])
278
279        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
280
281        self.obj.partial_fit(scaled_Z, centered_y, **kwargs)
282
283        self.X_ = X
284
285        self.y_ = y
286
287        return self
288
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )
412
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Custom Regression model

This class is used to 'augment' any regression model with transformed features.

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

type_pi: str.
    type of prediction interval; currently `None` (split or local
    conformal without simulation), "kde" or "bootstrap" (simulated split
    conformal).

replications: int.
    number of replications (if needed) for predictive simulation.
    Used only in `self.predict`, for `self.kernel` in ('gaussian',
    'tophat') and `self.type_pi = 'kde'`. Default is `None`.

kernel: str.
    the kernel to use for kernel density estimation (used for predictive
    simulation in `self.predict`, with `method='splitconformal'` and
    `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.

type_split: str.
    Type of splitting for conformal prediction. None (default), or
    "random" (random split of data) or "sequential" (sequential split of data)

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

level: float
    confidence level for prediction intervals

pi_method: str
    method for prediction intervals: 'splitconformal' or 'localconformal'

seed: int
    reproducibility seed for nodes_sim=='uniform'

type_fit: str
    'regression'

backend: str
    "cpu" or "gpu" or "tpu"

Examples:

See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression

def fit(self, X, y, sample_weight=None, **kwargs):
178    def fit(self, X, y, sample_weight=None, **kwargs):
179        """Fit custom model to training data (X, y).
180
181        Parameters:
182
183            X: {array-like}, shape = [n_samples, n_features]
184                Training vectors, where n_samples is the number
185                of samples and n_features is the number of features.
186
187            y: array-like, shape = [n_samples]
188                Target values.
189
190            sample_weight: array-like, shape = [n_samples]
191                Sample weights.
192
193            **kwargs: additional parameters to be passed to
194                self.cook_training_set or self.obj.fit
195
196        Returns:
197
198            self: object
199
200        """
201
202        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
203
204        if self.level is not None:
205            self.obj = PredictionInterval(
206                obj=self.obj, method=self.pi_method, level=self.level
207            )
208
209        # if sample_weights, else: (must use self.row_index)
210        if sample_weight is not None:
211            self.obj.fit(
212                scaled_Z,
213                centered_y,
214                sample_weight=sample_weight[self.index_row_].ravel(),
215                **kwargs
216            )
217
218            return self
219
220        self.obj.fit(scaled_Z, centered_y, **kwargs)
221
222        self.X_ = X
223
224        self.y_ = y
225
226        # Compute SSE
227        centered_y_pred = self.obj.predict(scaled_Z)
228        self.sse_ = np.sum((centered_y - centered_y_pred) ** 2)
229
230        # Get number of parameters
231        n_params = (
232            self.n_hidden_features + X.shape[1]
233        )  # hidden features + original features
234        if self.n_clusters > 0:
235            n_params += self.n_clusters  # add clusters if used
236
237        # Compute information criteria
238        n_samples = X.shape[0]
239        temp = n_samples * np.log(self.sse_ / n_samples)
240        self.aic_ = temp + 2 * n_params
241        self.bic_ = temp + np.log(n_samples) * n_params
242
243        if hasattr(self.obj, "coef_"):
244            self.coef_ = self.obj.coef_
245
246        if hasattr(self.obj, "intercept_"):
247            self.intercept_ = self.obj.intercept_
248
249        return self

Fit custom model to training data (X, y).

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.

**kwargs: additional parameters to be passed to
    self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, level=95, method='splitconformal', **kwargs):
289    def predict(self, X, level=95, method="splitconformal", **kwargs):
290        """Predict test data X.
291
292        Parameters:
293
294            X: {array-like}, shape = [n_samples, n_features]
295                Training vectors, where n_samples is the number
296                of samples and n_features is the number of features.
297
298            level: int
299                Level of confidence (default = 95)
300
301            method: str
302                'splitconformal', 'localconformal'
303                prediction (if you specify `return_pi = True`)
304
305            **kwargs: additional parameters
306                    `return_pi = True` for conformal prediction,
307                    with `method` in ('splitconformal', 'localconformal')
308                    or `return_std = True` for `self.obj` in
309                    (`sklearn.linear_model.BayesianRidge`,
310                    `sklearn.linear_model.ARDRegressor`,
311                    `sklearn.gaussian_process.GaussianProcessRegressor`)`
312
313        Returns:
314
315            model predictions:
316                an array if uncertainty quantification is not requested,
317                  or a tuple if with prediction intervals and simulations
318                  if `return_std = True` (mean, standard deviation,
319                  lower and upper prediction interval) or `return_pi = True`
320                  ()
321
322        """
323
324        if "return_std" in kwargs:
325            alpha = 100 - level
326            pi_multiplier = norm.ppf(1 - alpha / 200)
327
328            if len(X.shape) == 1:
329                n_features = X.shape[0]
330                new_X = mo.rbind(
331                    X.reshape(1, n_features),
332                    np.ones(n_features).reshape(1, n_features),
333                )
334
335                mean_, std_ = self.obj.predict(
336                    self.cook_test_set(new_X, **kwargs), return_std=True
337                )[0]
338
339                preds = self.y_mean_ + mean_
340                lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
341                upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
342
343                DescribeResults = namedtuple(
344                    "DescribeResults", ["mean", "std", "lower", "upper"]
345                )
346
347                return DescribeResults(preds, std_, lower, upper)
348
349            # len(X.shape) > 1
350            mean_, std_ = self.obj.predict(
351                self.cook_test_set(X, **kwargs), return_std=True
352            )
353
354            preds = self.y_mean_ + mean_
355            lower = self.y_mean_ + (mean_ - pi_multiplier * std_)
356            upper = self.y_mean_ + (mean_ + pi_multiplier * std_)
357
358            DescribeResults = namedtuple(
359                "DescribeResults", ["mean", "std", "lower", "upper"]
360            )
361
362            return DescribeResults(preds, std_, lower, upper)
363
364        if "return_pi" in kwargs:
365            assert method in (
366                "splitconformal",
367                "localconformal",
368            ), "method must be in ('splitconformal', 'localconformal')"
369            self.pi = PredictionInterval(
370                obj=self,
371                method=method,
372                level=level,
373                type_pi=self.type_pi,
374                replications=self.replications,
375                kernel=self.kernel,
376            )
377
378            if len(self.X_.shape) == 1:
379                if isinstance(X, pd.DataFrame):
380                    self.X_ = pd.DataFrame(
381                        self.X_.values.reshape(1, -1), columns=self.X_.columns
382                    )
383                else:
384                    self.X_ = self.X_.reshape(1, -1)
385                self.y_ = np.array([self.y_])
386
387            self.pi.fit(self.X_, self.y_)
388            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
389            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
390            preds = self.pi.predict(X, return_pi=True)
391            return preds
392
393        # "return_std" not in kwargs
394        if len(X.shape) == 1:
395            n_features = X.shape[0]
396            new_X = mo.rbind(
397                X.reshape(1, n_features),
398                np.ones(n_features).reshape(1, n_features),
399            )
400
401            return (
402                self.y_mean_
403                + self.obj.predict(
404                    self.cook_test_set(new_X, **kwargs), **kwargs
405                )
406            )[0]
407
408        # len(X.shape) > 1
409        return self.y_mean_ + self.obj.predict(
410            self.cook_test_set(X, **kwargs), **kwargs
411        )

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
413    def score(self, X, y, scoring=None):
414        """Compute the score of the model.
415
416        Parameters:
417
418            X: {array-like}, shape = [n_samples, n_features]
419                Training vectors, where n_samples is the number
420                of samples and n_features is the number of features.
421
422            y: array-like, shape = [n_samples]
423                Target values.
424
425            scoring: str
426                scoring method
427
428        Returns:
429
430            score: float
431
432        """
433
434        if scoring is None:
435            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
436
437        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class CustomBackPropRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 18class CustomBackPropRegressor(Custom, RegressorMixin):
 19    """
 20    Finite difference trainer for nnetsauce models.
 21
 22    Parameters
 23    ----------
 24
 25    base_model : str
 26        The name of the base model (e.g., 'RidgeCV').
 27
 28    type_grad : {'finitediff', 'autodiff'}, optional
 29        Type of gradient computation to use (default='finitediff').
 30
 31    lr : float, optional
 32        Learning rate for optimization (default=1e-4).
 33
 34    optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional
 35        Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'),
 36        Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
 37
 38    eps : float, optional
 39        Scaling factor for adaptive finite difference step size (default=1e-3).
 40
 41    batch_size : int, optional
 42        Batch size for 'sgd' optimizer (default=32).
 43
 44    alpha : float, optional
 45        Elastic net penalty strength (default=0.0).
 46
 47    l1_ratio : float, optional
 48        Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
 49
 50    type_loss : {'mse', 'quantile'}, optional
 51        Type of loss function to use (default='mse').
 52
 53    q : float, optional
 54        Quantile for quantile loss (default=0.5).
 55
 56    **kwargs
 57        Additional parameters to pass to the scikit-learn model.
 58
 59    """
 60
 61    def __init__(
 62        self,
 63        base_model,
 64        type_grad="finitediff",
 65        lr=1e-4,
 66        optimizer="gd",
 67        eps=1e-3,
 68        batch_size=32,
 69        alpha=0.0,
 70        l1_ratio=0.0,
 71        type_loss="mse",
 72        q=0.5,
 73        backend="cpu",
 74        **kwargs,
 75    ):
 76        super().__init__(base_model, True, **kwargs)
 77        self.base_model = base_model
 78        self.custom_kwargs = kwargs
 79        self.backend = backend
 80        self.model = ns.CustomRegressor(
 81            self.base_model, backend=self.backend, **self.custom_kwargs
 82        )
 83        assert isinstance(
 84            self.model, ns.CustomRegressor
 85        ), "'model' must be of class ns.CustomRegressor"
 86        self.type_grad = type_grad
 87        self.lr = lr
 88        self.optimizer = optimizer
 89        self.eps = eps
 90        self.loss_history_ = []
 91        self.opt_state = None
 92        self.batch_size = batch_size  # for SGD
 93        self.loss_history_ = []
 94        self._cd_index = 0  # For coordinate descent
 95        self.alpha = alpha
 96        self.l1_ratio = l1_ratio
 97        self.type_loss = type_loss
 98        self.q = q
 99
100    def _loss(self, X, y, **kwargs):
101        """
102        Compute the loss (with elastic net penalty) for the current model.
103
104        Parameters
105        ----------
106
107        X : array-like of shape (n_samples, n_features)
108            Input data.
109
110        y : array-like of shape (n_samples,)
111            Target values.
112
113        **kwargs
114            Additional keyword arguments for loss calculation.
115
116        Returns
117        -------
118        float
119            The computed loss value.
120        """
121        y_pred = self.model.predict(X)
122        if self.type_loss == "mse":
123            loss = np.mean((y - y_pred) ** 2)
124        elif self.type_loss == "quantile":
125            loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs)
126        W = self.model.W_
127        l1 = np.sum(np.abs(W))
128        l2 = np.sum(W**2)
129        return loss + self.alpha * (
130            self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2
131        )
132
133    def _compute_grad(self, X, y):
134        """
135        Compute the gradient of the loss with respect to W_ using finite differences.
136
137        Parameters
138        ----------
139
140        X : array-like of shape (n_samples, n_features)
141            Input data.
142
143        y : array-like of shape (n_samples,)
144            Target values.
145
146        Returns
147        -------
148
149        ndarray
150            Gradient array with the same shape as W_.
151        """
152
153        # Finite difference gradient computation
154        W = deepcopy(self.model.W_)
155        shape = W.shape
156        W_flat = W.flatten()
157        n_params = W_flat.size
158
159        # Adaptive finite difference step
160        h_vec = self.eps * np.maximum(1.0, np.abs(W_flat))
161        eye = np.eye(n_params)
162
163        loss_plus = np.zeros(n_params)
164        loss_minus = np.zeros(n_params)
165
166        for i in range(n_params):
167            h_i = h_vec[i]
168            Wp = W_flat.copy()
169            Wp[i] += h_i
170            Wm = W_flat.copy()
171            Wm[i] -= h_i
172
173            self.model.W_ = Wp.reshape(shape)
174            loss_plus[i] = self._loss(X, y)
175
176            self.model.W_ = Wm.reshape(shape)
177            loss_minus[i] = self._loss(X, y)
178
179        grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape)
180
181        # Add elastic net gradient
182        l1_grad = self.alpha * self.l1_ratio * np.sign(W)
183        l2_grad = self.alpha * (1 - self.l1_ratio) * W
184        grad += l1_grad + l2_grad
185
186        self.model.W_ = W  # restore original
187        return grad
188
189    def fit(
190        self,
191        X,
192        y,
193        epochs=10,
194        verbose=True,
195        show_progress=True,
196        sample_weight=None,
197        **kwargs,
198    ):
199        """
200        Fit the model using finite difference optimization.
201
202        Parameters
203        ----------
204
205        X : array-like of shape (n_samples, n_features)
206            Training data.
207
208        y : array-like of shape (n_samples,)
209            Target values.
210
211        epochs : int, optional
212            Number of optimization steps (default=10).
213
214        verbose : bool, optional
215            Whether to print progress messages (default=True).
216
217        show_progress : bool, optional
218            Whether to show tqdm progress bar (default=True).
219
220        sample_weight : array-like, optional
221            Sample weights.
222
223        **kwargs
224            Additional keyword arguments.
225
226        Returns
227        -------
228
229        self : object
230            Returns self.
231        """
232
233        self.model.fit(X, y)
234
235        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
236
237        for epoch in iterator:
238            grad = self._compute_grad(X, y)
239
240            if self.optimizer == "gd":
241                self.model.W_ -= self.lr * grad
242                self.model.W_ = np.clip(self.model.W_, 0, 1)
243                # print("self.model.W_", self.model.W_)
244
245            elif self.optimizer == "sgd":
246                # Sample a mini-batch for stochastic gradient
247                n_samples = X.shape[0]
248                idxs = np.random.choice(
249                    n_samples, self.batch_size, replace=False
250                )
251                if isinstance(X, pd.DataFrame):
252                    X_batch = X.iloc[idxs, :]
253                else:
254                    X_batch = X[idxs, :]
255                y_batch = y[idxs]
256                grad = self._compute_grad(X_batch, y_batch)
257
258                self.model.W_ -= self.lr * grad
259                self.model.W_ = np.clip(self.model.W_, 0, 1)
260
261            elif self.optimizer == "adam":
262                if self.opt_state is None:
263                    self.opt_state = {
264                        "m": np.zeros_like(grad),
265                        "v": np.zeros_like(grad),
266                        "t": 0,
267                    }
268                beta1, beta2, eps = 0.9, 0.999, 1e-8
269                self.opt_state["t"] += 1
270                self.opt_state["m"] = (
271                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
272                )
273                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
274                    1 - beta2
275                ) * (grad**2)
276                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
277                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
278
279                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
280                self.model.W_ = np.clip(self.model.W_, 0, 1)
281                # print("self.model.W_", self.model.W_)
282
283            elif self.optimizer == "cd":  # coordinate descent
284                W_shape = self.model.W_.shape
285                W_flat_size = self.model.W_.size
286                W_flat = self.model.W_.flatten()
287                grad_flat = grad.flatten()
288
289                # Update only one coordinate per epoch (cyclic)
290                idx = self._cd_index % W_flat_size
291                W_flat[idx] -= self.lr * grad_flat[idx]
292                # Clip the updated value
293                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
294
295                # Restore W_
296                self.model.W_ = W_flat.reshape(W_shape)
297
298                self._cd_index += 1
299
300            else:
301                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
302
303            loss = self._loss(X, y)
304            self.loss_history_.append(loss)
305
306            if verbose:
307                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
308
309        # if sample_weights, else: (must use self.row_index)
310        if sample_weight in kwargs:
311            self.model.fit(
312                X,
313                y,
314                sample_weight=sample_weight[self.index_row_].ravel(),
315                **kwargs,
316            )
317
318            return self
319
320        return self
321
322    def predict(self, X, level=95, method="splitconformal", **kwargs):
323        """
324        Predict using the trained model.
325
326        Parameters
327        ----------
328
329        X : array-like of shape (n_samples, n_features)
330            Input data.
331
332        level : int, optional
333            Level of confidence for prediction intervals (default=95).
334
335        method : {'splitconformal', 'localconformal'}, optional
336            Method for conformal prediction (default='splitconformal').
337
338        **kwargs
339            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
340            or `return_std=True` for standard deviation estimates.
341
342        Returns
343        -------
344
345        array or tuple
346            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
347        """
348        if "return_std" in kwargs:
349            alpha = 100 - level
350            pi_multiplier = norm.ppf(1 - alpha / 200)
351
352            if len(X.shape) == 1:
353                n_features = X.shape[0]
354                new_X = mo.rbind(
355                    X.reshape(1, n_features),
356                    np.ones(n_features).reshape(1, n_features),
357                )
358
359                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
360
361                preds = mean_
362                lower = mean_ - pi_multiplier * std_
363                upper = mean_ + pi_multiplier * std_
364
365                DescribeResults = namedtuple(
366                    "DescribeResults", ["mean", "std", "lower", "upper"]
367                )
368
369                return DescribeResults(preds, std_, lower, upper)
370
371            # len(X.shape) > 1
372            mean_, std_ = self.model.predict(X, return_std=True)
373
374            preds = mean_
375            lower = mean_ - pi_multiplier * std_
376            upper = mean_ + pi_multiplier * std_
377
378            DescribeResults = namedtuple(
379                "DescribeResults", ["mean", "std", "lower", "upper"]
380            )
381
382            return DescribeResults(preds, std_, lower, upper)
383
384        if "return_pi" in kwargs:
385            assert method in (
386                "splitconformal",
387                "localconformal",
388            ), "method must be in ('splitconformal', 'localconformal')"
389            self.pi = ns.PredictionInterval(
390                obj=self,
391                method=method,
392                level=level,
393                type_pi=self.type_pi,
394                replications=self.replications,
395                kernel=self.kernel,
396            )
397
398            if len(self.X_.shape) == 1:
399                if isinstance(X, pd.DataFrame):
400                    self.X_ = pd.DataFrame(
401                        self.X_.values.reshape(1, -1), columns=self.X_.columns
402                    )
403                else:
404                    self.X_ = self.X_.reshape(1, -1)
405                self.y_ = np.array([self.y_])
406
407            self.pi.fit(self.X_, self.y_)
408            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
409            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
410            preds = self.pi.predict(X, return_pi=True)
411            return preds
412
413        # "return_std" not in kwargs
414        if len(X.shape) == 1:
415            n_features = X.shape[0]
416            new_X = mo.rbind(
417                X.reshape(1, n_features),
418                np.ones(n_features).reshape(1, n_features),
419            )
420
421            return (0 + self.model.predict(new_X, **kwargs))[0]
422
423        # len(X.shape) > 1
424        return self.model.predict(X, **kwargs)

Finite difference trainer for nnetsauce models.

Parameters

base_model : str The name of the base model (e.g., 'RidgeCV').

type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').

lr : float, optional Learning rate for optimization (default=1e-4).

optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.

eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).

batch_size : int, optional Batch size for 'sgd' optimizer (default=32).

alpha : float, optional Elastic net penalty strength (default=0.0).

l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).

type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').

q : float, optional Quantile for quantile loss (default=0.5).

**kwargs Additional parameters to pass to the scikit-learn model.

def fit( self, X, y, epochs=10, verbose=True, show_progress=True, sample_weight=None, **kwargs):
189    def fit(
190        self,
191        X,
192        y,
193        epochs=10,
194        verbose=True,
195        show_progress=True,
196        sample_weight=None,
197        **kwargs,
198    ):
199        """
200        Fit the model using finite difference optimization.
201
202        Parameters
203        ----------
204
205        X : array-like of shape (n_samples, n_features)
206            Training data.
207
208        y : array-like of shape (n_samples,)
209            Target values.
210
211        epochs : int, optional
212            Number of optimization steps (default=10).
213
214        verbose : bool, optional
215            Whether to print progress messages (default=True).
216
217        show_progress : bool, optional
218            Whether to show tqdm progress bar (default=True).
219
220        sample_weight : array-like, optional
221            Sample weights.
222
223        **kwargs
224            Additional keyword arguments.
225
226        Returns
227        -------
228
229        self : object
230            Returns self.
231        """
232
233        self.model.fit(X, y)
234
235        iterator = tqdm(range(epochs)) if show_progress else range(epochs)
236
237        for epoch in iterator:
238            grad = self._compute_grad(X, y)
239
240            if self.optimizer == "gd":
241                self.model.W_ -= self.lr * grad
242                self.model.W_ = np.clip(self.model.W_, 0, 1)
243                # print("self.model.W_", self.model.W_)
244
245            elif self.optimizer == "sgd":
246                # Sample a mini-batch for stochastic gradient
247                n_samples = X.shape[0]
248                idxs = np.random.choice(
249                    n_samples, self.batch_size, replace=False
250                )
251                if isinstance(X, pd.DataFrame):
252                    X_batch = X.iloc[idxs, :]
253                else:
254                    X_batch = X[idxs, :]
255                y_batch = y[idxs]
256                grad = self._compute_grad(X_batch, y_batch)
257
258                self.model.W_ -= self.lr * grad
259                self.model.W_ = np.clip(self.model.W_, 0, 1)
260
261            elif self.optimizer == "adam":
262                if self.opt_state is None:
263                    self.opt_state = {
264                        "m": np.zeros_like(grad),
265                        "v": np.zeros_like(grad),
266                        "t": 0,
267                    }
268                beta1, beta2, eps = 0.9, 0.999, 1e-8
269                self.opt_state["t"] += 1
270                self.opt_state["m"] = (
271                    beta1 * self.opt_state["m"] + (1 - beta1) * grad
272                )
273                self.opt_state["v"] = beta2 * self.opt_state["v"] + (
274                    1 - beta2
275                ) * (grad**2)
276                m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"])
277                v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"])
278
279                self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps)
280                self.model.W_ = np.clip(self.model.W_, 0, 1)
281                # print("self.model.W_", self.model.W_)
282
283            elif self.optimizer == "cd":  # coordinate descent
284                W_shape = self.model.W_.shape
285                W_flat_size = self.model.W_.size
286                W_flat = self.model.W_.flatten()
287                grad_flat = grad.flatten()
288
289                # Update only one coordinate per epoch (cyclic)
290                idx = self._cd_index % W_flat_size
291                W_flat[idx] -= self.lr * grad_flat[idx]
292                # Clip the updated value
293                W_flat[idx] = np.clip(W_flat[idx], 0, 1)
294
295                # Restore W_
296                self.model.W_ = W_flat.reshape(W_shape)
297
298                self._cd_index += 1
299
300            else:
301                raise ValueError(f"Unsupported optimizer: {self.optimizer}")
302
303            loss = self._loss(X, y)
304            self.loss_history_.append(loss)
305
306            if verbose:
307                print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
308
309        # if sample_weights, else: (must use self.row_index)
310        if sample_weight in kwargs:
311            self.model.fit(
312                X,
313                y,
314                sample_weight=sample_weight[self.index_row_].ravel(),
315                **kwargs,
316            )
317
318            return self
319
320        return self

Fit the model using finite difference optimization.

Parameters

X : array-like of shape (n_samples, n_features) Training data.

y : array-like of shape (n_samples,) Target values.

epochs : int, optional Number of optimization steps (default=10).

verbose : bool, optional Whether to print progress messages (default=True).

show_progress : bool, optional Whether to show tqdm progress bar (default=True).

sample_weight : array-like, optional Sample weights.

**kwargs Additional keyword arguments.

Returns

self : object Returns self.

def predict(self, X, level=95, method='splitconformal', **kwargs):
322    def predict(self, X, level=95, method="splitconformal", **kwargs):
323        """
324        Predict using the trained model.
325
326        Parameters
327        ----------
328
329        X : array-like of shape (n_samples, n_features)
330            Input data.
331
332        level : int, optional
333            Level of confidence for prediction intervals (default=95).
334
335        method : {'splitconformal', 'localconformal'}, optional
336            Method for conformal prediction (default='splitconformal').
337
338        **kwargs
339            Additional keyword arguments. Use `return_pi=True` for prediction intervals,
340            or `return_std=True` for standard deviation estimates.
341
342        Returns
343        -------
344
345        array or tuple
346            Model predictions, or a tuple with prediction intervals or standard deviations if requested.
347        """
348        if "return_std" in kwargs:
349            alpha = 100 - level
350            pi_multiplier = norm.ppf(1 - alpha / 200)
351
352            if len(X.shape) == 1:
353                n_features = X.shape[0]
354                new_X = mo.rbind(
355                    X.reshape(1, n_features),
356                    np.ones(n_features).reshape(1, n_features),
357                )
358
359                mean_, std_ = self.model.predict(new_X, return_std=True)[0]
360
361                preds = mean_
362                lower = mean_ - pi_multiplier * std_
363                upper = mean_ + pi_multiplier * std_
364
365                DescribeResults = namedtuple(
366                    "DescribeResults", ["mean", "std", "lower", "upper"]
367                )
368
369                return DescribeResults(preds, std_, lower, upper)
370
371            # len(X.shape) > 1
372            mean_, std_ = self.model.predict(X, return_std=True)
373
374            preds = mean_
375            lower = mean_ - pi_multiplier * std_
376            upper = mean_ + pi_multiplier * std_
377
378            DescribeResults = namedtuple(
379                "DescribeResults", ["mean", "std", "lower", "upper"]
380            )
381
382            return DescribeResults(preds, std_, lower, upper)
383
384        if "return_pi" in kwargs:
385            assert method in (
386                "splitconformal",
387                "localconformal",
388            ), "method must be in ('splitconformal', 'localconformal')"
389            self.pi = ns.PredictionInterval(
390                obj=self,
391                method=method,
392                level=level,
393                type_pi=self.type_pi,
394                replications=self.replications,
395                kernel=self.kernel,
396            )
397
398            if len(self.X_.shape) == 1:
399                if isinstance(X, pd.DataFrame):
400                    self.X_ = pd.DataFrame(
401                        self.X_.values.reshape(1, -1), columns=self.X_.columns
402                    )
403                else:
404                    self.X_ = self.X_.reshape(1, -1)
405                self.y_ = np.array([self.y_])
406
407            self.pi.fit(self.X_, self.y_)
408            # self.X_ = None # consumes memory to keep, dangerous to delete (side effect)
409            # self.y_ = None # consumes memory to keep, dangerous to delete (side effect)
410            preds = self.pi.predict(X, return_pi=True)
411            return preds
412
413        # "return_std" not in kwargs
414        if len(X.shape) == 1:
415            n_features = X.shape[0]
416            new_X = mo.rbind(
417                X.reshape(1, n_features),
418                np.ones(n_features).reshape(1, n_features),
419            )
420
421            return (0 + self.model.predict(new_X, **kwargs))[0]
422
423        # len(X.shape) > 1
424        return self.model.predict(X, **kwargs)

Predict using the trained model.

Parameters

X : array-like of shape (n_samples, n_features) Input data.

level : int, optional Level of confidence for prediction intervals (default=95).

method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').

**kwargs Additional keyword arguments. Use return_pi=True for prediction intervals, or return_std=True for standard deviation estimates.

Returns

array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.

class DeepClassifier(nnetsauce.CustomClassifier, sklearn.base.ClassifierMixin):
 36class DeepClassifier(CustomClassifier, ClassifierMixin):
 37    """
 38    Deep Classifier
 39
 40    Parameters:
 41
 42        obj: an object
 43            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 44
 45        n_layers: int (default=3)
 46            Number of layers. `n_layers = 1` is a simple `CustomClassifier`
 47
 48        verbose : int, optional (default=0)
 49            Monitor progress when fitting.
 50
 51        All the other parameters are nnetsauce `CustomClassifier`'s
 52
 53    Examples:
 54
 55        ```python
 56        import nnetsauce as ns
 57        from sklearn.datasets import load_breast_cancer
 58        from sklearn.model_selection import train_test_split
 59        from sklearn.linear_model import LogisticRegressionCV
 60        data = load_breast_cancer()
 61        X = data.data
 62        y= data.target
 63        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 64        obj = LogisticRegressionCV()
 65        clf = ns.DeepClassifier(obj)
 66        clf.fit(X_train, y_train)
 67        print(clf.score(clf.predict(X_test), y_test))
 68        ```
 69    """
 70
 71    _estimator_type = "classifier"
 72
 73    def __init__(
 74        self,
 75        obj,
 76        # Defining depth
 77        n_layers=3,
 78        verbose=0,
 79        # CustomClassifier attributes
 80        n_hidden_features=5,
 81        activation_name="relu",
 82        a=0.01,
 83        nodes_sim="sobol",
 84        bias=True,
 85        dropout=0,
 86        direct_link=True,
 87        n_clusters=2,
 88        cluster_encode=True,
 89        type_clust="kmeans",
 90        type_scaling=("std", "std", "std"),
 91        col_sample=1,
 92        row_sample=1,
 93        cv_calibration=2,
 94        calibration_method="sigmoid",
 95        seed=123,
 96        backend="cpu",
 97    ):
 98        super().__init__(
 99            obj=obj,
100            n_hidden_features=n_hidden_features,
101            activation_name=activation_name,
102            a=a,
103            nodes_sim=nodes_sim,
104            bias=bias,
105            dropout=dropout,
106            direct_link=direct_link,
107            n_clusters=n_clusters,
108            cluster_encode=cluster_encode,
109            type_clust=type_clust,
110            type_scaling=type_scaling,
111            col_sample=col_sample,
112            row_sample=row_sample,
113            seed=seed,
114            backend=backend,
115        )
116        self.coef_ = None
117        self.intercept_ = None
118        self.type_fit = "classification"
119        self.cv_calibration = cv_calibration
120        self.calibration_method = calibration_method
121
122        # Only wrap in CalibratedClassifierCV if not already wrapped
123        # if not isinstance(obj, CalibratedClassifierCV):
124        #     self.obj = CalibratedClassifierCV(
125        #         self.obj,
126        #         cv=self.cv_calibration,
127        #         method=self.calibration_method
128        #     )
129        # else:
130        self.coef_ = None
131        self.intercept_ = None
132        self.type_fit = "classification"
133        self.cv_calibration = cv_calibration
134        self.calibration_method = calibration_method
135        self.obj = obj
136
137        assert n_layers >= 1, "must have n_layers >= 1"
138        self.stacked_obj = obj
139        self.verbose = verbose
140        self.n_layers = n_layers
141        self.classes_ = None
142        self.n_classes_ = None
143
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self
224
225    def partial_fit(self, X, y, **kwargs):
226        """Fit Regression algorithms to X and y.
227        Parameters
228        ----------
229        X : array-like,
230            Training vectors, where rows is the number of samples
231            and columns is the number of features.
232        y : array-like,
233            Training vectors, where rows is the number of samples
234            and columns is the number of features.
235        **kwargs: dict
236            Additional parameters to be passed to the fit method
237            of the base learner. For example, `sample_weight`.
238        Returns
239        -------
240        A fitted object
241        """
242        assert hasattr(self, "stacked_obj"), "model must be fitted first"
243        current_obj = self.stacked_obj
244        for _ in range(self.n_layers):
245            try:
246                input_X = current_obj.obj.cook_test_set(X)
247                current_obj.obj.partial_fit(input_X, y, **kwargs)
248                try:
249                    current_obj = current_obj.obj
250                except AttributeError:
251                    pass
252            except ValueError:
253                pass
254        return self
255
256    def predict(self, X):
257        return self.stacked_obj.predict(X)
258
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)
261
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)
264
265    def cross_val_optim(
266        self,
267        X_train,
268        y_train,
269        X_test=None,
270        y_test=None,
271        scoring="accuracy",
272        surrogate_obj=None,
273        cv=5,
274        n_jobs=None,
275        n_init=10,
276        n_iter=190,
277        abs_tol=1e-3,
278        verbose=2,
279        seed=123,
280        **kwargs,
281    ):
282        """Cross-validation function and hyperparameters' search
283
284        Parameters:
285
286            X_train: array-like,
287                Training vectors, where rows is the number of samples
288                and columns is the number of features.
289
290            y_train: array-like,
291                Training vectors, where rows is the number of samples
292                and columns is the number of features.
293
294            X_test: array-like,
295                Testing vectors, where rows is the number of samples
296                and columns is the number of features.
297
298            y_test: array-like,
299                Testing vectors, where rows is the number of samples
300                and columns is the number of features.
301
302            scoring: str
303                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
304
305            surrogate_obj: an object;
306                An ML model for estimating the uncertainty around the objective function
307
308            cv: int;
309                number of cross-validation folds
310
311            n_jobs: int;
312                number of jobs for parallel execution
313
314            n_init: an integer;
315                number of points in the initial setting, when `x_init` and `y_init` are not provided
316
317            n_iter: an integer;
318                number of iterations of the minimization algorithm
319
320            abs_tol: a float;
321                tolerance for convergence of the optimizer (early stopping based on acquisition function)
322
323            verbose: int
324                controls verbosity
325
326            seed: int
327                reproducibility seed
328
329            **kwargs: dict
330                additional parameters to be passed to the estimator
331
332        Examples:
333
334            ```python
335            ```
336        """
337
338        num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"}
339        num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"}
340        num_to_type_clust = {1: "kmeans", 2: "gmm"}
341
342        def deepclassifier_cv(
343            X_train,
344            y_train,
345            # Defining depth
346            n_layers=3,
347            # CustomClassifier attributes
348            n_hidden_features=5,
349            activation_name="relu",
350            nodes_sim="sobol",
351            dropout=0,
352            n_clusters=2,
353            type_clust="kmeans",
354            cv=5,
355            n_jobs=None,
356            scoring="accuracy",
357            seed=123,
358        ):
359            self.set_params(
360                **{
361                    "n_layers": n_layers,
362                    # CustomClassifier attributes
363                    "n_hidden_features": n_hidden_features,
364                    "activation_name": activation_name,
365                    "nodes_sim": nodes_sim,
366                    "dropout": dropout,
367                    "n_clusters": n_clusters,
368                    "type_clust": type_clust,
369                    **kwargs,
370                }
371            )
372            return -cross_val_score(
373                estimator=self,
374                X=X_train,
375                y=y_train,
376                scoring=scoring,
377                cv=cv,
378                n_jobs=n_jobs,
379                verbose=0,
380            ).mean()
381
382        # objective function for hyperparams tuning
383        def crossval_objective(xx):
384            return deepclassifier_cv(
385                X_train=X_train,
386                y_train=y_train,
387                # Defining depth
388                n_layers=int(np.ceil(xx[0])),
389                # CustomClassifier attributes
390                n_hidden_features=int(np.ceil(xx[1])),
391                activation_name=num_to_activation_name[np.ceil(xx[2])],
392                nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))],
393                dropout=xx[4],
394                n_clusters=int(np.ceil(xx[5])),
395                type_clust=num_to_type_clust[int(np.ceil(xx[6]))],
396                cv=cv,
397                n_jobs=n_jobs,
398                scoring=scoring,
399                seed=seed,
400            )
401
402        if surrogate_obj is None:
403            gp_opt = gp.GPOpt(
404                objective_func=crossval_objective,
405                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
406                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
407                params_names=[
408                    "n_layers",
409                    # CustomClassifier attributes
410                    "n_hidden_features",
411                    "activation_name",
412                    "nodes_sim",
413                    "dropout",
414                    "n_clusters",
415                    "type_clust",
416                ],
417                method="bayesian",
418                n_init=n_init,
419                n_iter=n_iter,
420                seed=seed,
421            )
422        else:
423            gp_opt = gp.GPOpt(
424                objective_func=crossval_objective,
425                lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]),
426                upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]),
427                params_names=[
428                    "n_layers",
429                    # CustomClassifier attributes
430                    "n_hidden_features",
431                    "activation_name",
432                    "nodes_sim",
433                    "dropout",
434                    "n_clusters",
435                    "type_clust",
436                ],
437                acquisition="ucb",
438                method="splitconformal",
439                surrogate_obj=ns.PredictionInterval(
440                    obj=surrogate_obj, method="splitconformal"
441                ),
442                n_init=n_init,
443                n_iter=n_iter,
444                seed=seed,
445            )
446
447        res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol)
448        res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"]))
449        res.best_params["n_hidden_features"] = int(
450            np.ceil(res.best_params["n_hidden_features"])
451        )
452        res.best_params["activation_name"] = num_to_activation_name[
453            np.ceil(res.best_params["activation_name"])
454        ]
455        res.best_params["nodes_sim"] = num_to_nodes_sim[
456            int(np.ceil(res.best_params["nodes_sim"]))
457        ]
458        res.best_params["dropout"] = res.best_params["dropout"]
459        res.best_params["n_clusters"] = int(
460            np.ceil(res.best_params["n_clusters"])
461        )
462        res.best_params["type_clust"] = num_to_type_clust[
463            int(np.ceil(res.best_params["type_clust"]))
464        ]
465
466        # out-of-sample error
467        if X_test is not None and y_test is not None:
468            self.set_params(**res.best_params, verbose=0, seed=seed)
469            preds = self.fit(X_train, y_train).predict(X_test)
470            # check error on y_test
471            oos_err = getattr(metrics, scoring + "_score")(
472                y_true=y_test, y_pred=preds
473            )
474            result = namedtuple("result", res._fields + ("test_" + scoring,))
475            return result(*res, oos_err)
476        else:
477            return res
478
479    def lazy_cross_val_optim(
480        self,
481        X_train,
482        y_train,
483        X_test=None,
484        y_test=None,
485        scoring="accuracy",
486        surrogate_objs=None,
487        customize=False,
488        cv=5,
489        n_jobs=None,
490        n_init=10,
491        n_iter=190,
492        abs_tol=1e-3,
493        verbose=1,
494        seed=123,
495    ):
496        """Automated Cross-validation function and hyperparameters' search using multiple surrogates
497
498        Parameters:
499
500            X_train: array-like,
501                Training vectors, where rows is the number of samples
502                and columns is the number of features.
503
504            y_train: array-like,
505                Training vectors, where rows is the number of samples
506                and columns is the number of features.
507
508            X_test: array-like,
509                Testing vectors, where rows is the number of samples
510                and columns is the number of features.
511
512            y_test: array-like,
513                Testing vectors, where rows is the number of samples
514                and columns is the number of features.
515
516            scoring: str
517                scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules
518
519            surrogate_objs: object names as a list of strings;
520                ML models for estimating the uncertainty around the objective function
521
522            customize: boolean
523                if True, the surrogate is transformed into a quasi-randomized network (default is False)
524
525            cv: int;
526                number of cross-validation folds
527
528            n_jobs: int;
529                number of jobs for parallel execution
530
531            n_init: an integer;
532                number of points in the initial setting, when `x_init` and `y_init` are not provided
533
534            n_iter: an integer;
535                number of iterations of the minimization algorithm
536
537            abs_tol: a float;
538                tolerance for convergence of the optimizer (early stopping based on acquisition function)
539
540            verbose: int
541                controls verbosity
542
543            seed: int
544                reproducibility seed
545
546        Examples:
547
548            ```python
549            ```
550        """
551
552        removed_regressors = [
553            "TheilSenRegressor",
554            "ARDRegression",
555            "CCA",
556            "GaussianProcessRegressor",
557            "GradientBoostingRegressor",
558            "HistGradientBoostingRegressor",
559            "IsotonicRegression",
560            "MultiOutputRegressor",
561            "MultiTaskElasticNet",
562            "MultiTaskElasticNetCV",
563            "MultiTaskLasso",
564            "MultiTaskLassoCV",
565            "OrthogonalMatchingPursuit",
566            "OrthogonalMatchingPursuitCV",
567            "PLSCanonical",
568            "PLSRegression",
569            "RadiusNeighborsRegressor",
570            "RegressorChain",
571            "StackingRegressor",
572            "VotingRegressor",
573        ]
574
575        results = []
576
577        for est in all_estimators():
578            if surrogate_objs is None:
579                if issubclass(est[1], RegressorMixin) and (
580                    est[0] not in removed_regressors
581                ):
582                    try:
583                        if customize == True:
584                            surr_obj = ns.CustomClassifier(obj=est[1]())
585                        else:
586                            surr_obj = est[1]()
587                        res = self.cross_val_optim(
588                            X_train=X_train,
589                            y_train=y_train,
590                            X_test=X_test,
591                            y_test=y_test,
592                            surrogate_obj=surr_obj,
593                            cv=cv,
594                            n_jobs=n_jobs,
595                            scoring=scoring,
596                            n_init=n_init,
597                            n_iter=n_iter,
598                            abs_tol=abs_tol,
599                            verbose=verbose,
600                            seed=seed,
601                        )
602                        if customize == True:
603                            results.append((f"CustomClassifier({est[0]})", res))
604                        else:
605                            results.append((est[0], res))
606                    except:
607                        pass
608
609            else:
610                if (
611                    issubclass(est[1], RegressorMixin)
612                    and (est[0] not in removed_regressors)
613                    and est[0] in surrogate_objs
614                ):
615                    try:
616                        if customize == True:
617                            surr_obj = ns.CustomClassifier(obj=est[1]())
618                        else:
619                            surr_obj = est[1]()
620                        res = self.cross_val_optim(
621                            X_train=X_train,
622                            y_train=y_train,
623                            X_test=X_test,
624                            y_test=y_test,
625                            surrogate_obj=surr_obj,
626                            cv=cv,
627                            n_jobs=n_jobs,
628                            scoring=scoring,
629                            n_init=n_init,
630                            n_iter=n_iter,
631                            abs_tol=abs_tol,
632                            verbose=verbose,
633                            seed=seed,
634                        )
635                        if customize == True:
636                            results.append((f"CustomClassifier({est[0]})", res))
637                        else:
638                            results.append((est[0], res))
639                    except:
640                        pass
641
642        return results
643
644    @property
645    def _estimator_type(self):
646        return "classifier"

Deep Classifier

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

n_layers: int (default=3)
    Number of layers. `n_layers = 1` is a simple `CustomClassifier`

verbose : int, optional (default=0)
    Monitor progress when fitting.

All the other parameters are nnetsauce `CustomClassifier`'s

Examples:

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
def fit(self, X, y, **kwargs):
144    def fit(self, X, y, **kwargs):
145        """Fit Classification algorithms to X and y.
146        Parameters
147        ----------
148        X : array-like,
149            Training vectors, where rows is the number of samples
150            and columns is the number of features.
151        y : array-like,
152            Training vectors, where rows is the number of samples
153            and columns is the number of features.
154        **kwargs: dict
155            Additional parameters to be passed to the fit method
156            of the base learner. For example, `sample_weight`.
157
158        Returns
159        -------
160        A fitted object
161        """
162
163        self.classes_ = np.unique(y)
164        self.n_classes_ = len(
165            self.classes_
166        )  # for compatibility with         scikit-learn
167
168        if isinstance(X, np.ndarray):
169            X = pd.DataFrame(X)
170
171        # init layer
172        self.stacked_obj = CustomClassifier(
173            obj=self.stacked_obj,
174            n_hidden_features=self.n_hidden_features,
175            activation_name=self.activation_name,
176            a=self.a,
177            nodes_sim=self.nodes_sim,
178            bias=self.bias,
179            dropout=self.dropout,
180            direct_link=self.direct_link,
181            n_clusters=self.n_clusters,
182            cluster_encode=self.cluster_encode,
183            type_clust=self.type_clust,
184            type_scaling=self.type_scaling,
185            col_sample=self.col_sample,
186            row_sample=self.row_sample,
187            cv_calibration=None,
188            calibration_method=None,
189            seed=self.seed,
190            backend=self.backend,
191        )
192
193        if self.verbose > 0:
194            iterator = tqdm(range(self.n_layers - 1))
195        else:
196            iterator = range(self.n_layers - 1)
197
198        for _ in iterator:
199            self.stacked_obj = deepcopy(
200                CustomClassifier(
201                    obj=self.stacked_obj,
202                    n_hidden_features=self.n_hidden_features,
203                    activation_name=self.activation_name,
204                    a=self.a,
205                    nodes_sim=self.nodes_sim,
206                    bias=self.bias,
207                    dropout=self.dropout,
208                    direct_link=self.direct_link,
209                    n_clusters=self.n_clusters,
210                    cluster_encode=self.cluster_encode,
211                    type_clust=self.type_clust,
212                    type_scaling=self.type_scaling,
213                    col_sample=self.col_sample,
214                    row_sample=self.row_sample,
215                    cv_calibration=None,
216                    calibration_method=None,
217                    seed=self.seed,
218                    backend=self.backend,
219                )
220            )
221            self.stacked_obj.fit(X, y, **kwargs)
222
223        return self

Fit Classification algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X):
256    def predict(self, X):
257        return self.stacked_obj.predict(X)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X):
259    def predict_proba(self, X):
260        return self.stacked_obj.predict_proba(X)

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
262    def score(self, X, y, scoring=None):
263        return self.stacked_obj.score(X, y, scoring)

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class DeepRegressor(nnetsauce.CustomRegressor, sklearn.base.RegressorMixin):
 13class DeepRegressor(CustomRegressor, RegressorMixin):
 14    """
 15    Deep Regressor
 16
 17    Parameters:
 18
 19        obj: an object
 20            A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
 21
 22        verbose : int, optional (default=0)
 23            Monitor progress when fitting.
 24
 25        n_layers: int (default=2)
 26            Number of layers. `n_layers = 1` is a simple `CustomRegressor`
 27
 28        All the other parameters are nnetsauce `CustomRegressor`'s
 29
 30    Examples:
 31
 32        ```python
 33        import nnetsauce as ns
 34        from sklearn.datasets import load_diabetes
 35        from sklearn.model_selection import train_test_split
 36        from sklearn.linear_model import RidgeCV
 37        data = load_diabetes()
 38        X = data.data
 39        y= data.target
 40        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
 41        obj = RidgeCV()
 42        clf = ns.DeepRegressor(obj)
 43        clf.fit(X_train, y_train)
 44        print(clf.score(clf.predict(X_test), y_test))
 45        ```
 46
 47    """
 48
 49    def __init__(
 50        self,
 51        obj,
 52        # Defining depth
 53        n_layers=2,
 54        verbose=0,
 55        # CustomRegressor attributes
 56        n_hidden_features=5,
 57        activation_name="relu",
 58        a=0.01,
 59        nodes_sim="sobol",
 60        bias=True,
 61        dropout=0,
 62        direct_link=True,
 63        n_clusters=2,
 64        cluster_encode=True,
 65        type_clust="kmeans",
 66        type_scaling=("std", "std", "std"),
 67        col_sample=1,
 68        row_sample=1,
 69        level=None,
 70        pi_method="splitconformal",
 71        seed=123,
 72        backend="cpu",
 73    ):
 74        super().__init__(
 75            obj=obj,
 76            n_hidden_features=n_hidden_features,
 77            activation_name=activation_name,
 78            a=a,
 79            nodes_sim=nodes_sim,
 80            bias=bias,
 81            dropout=dropout,
 82            direct_link=direct_link,
 83            n_clusters=n_clusters,
 84            cluster_encode=cluster_encode,
 85            type_clust=type_clust,
 86            type_scaling=type_scaling,
 87            col_sample=col_sample,
 88            row_sample=row_sample,
 89            level=level,
 90            pi_method=pi_method,
 91            seed=seed,
 92            backend=backend,
 93        )
 94
 95        assert n_layers >= 1, "must have n_layers >= 1"
 96
 97        self.stacked_obj = deepcopy(obj)
 98        self.verbose = verbose
 99        self.n_layers = n_layers
100        self.level = level
101        self.pi_method = pi_method
102        self.coef_ = None
103
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self
195
196    def partial_fit(self, X, y, **kwargs):
197        """Fit Regression algorithms to X and y.
198        Parameters
199        ----------
200        X : array-like,
201            Training vectors, where rows is the number of samples
202            and columns is the number of features.
203        y : array-like,
204            Training vectors, where rows is the number of samples
205            and columns is the number of features.
206        **kwargs: dict
207            Additional parameters to be passed to the fit method
208            of the base learner. For example, `sample_weight`.
209        Returns
210        -------
211        A fitted object
212        """
213        assert hasattr(self, "stacked_obj"), "model must be fitted first"
214        current_obj = self.stacked_obj
215        for _ in range(self.n_layers):
216            try:
217                input_X = current_obj.obj.cook_test_set(X)
218                current_obj.obj.partial_fit(input_X, y, **kwargs)
219                try:
220                    current_obj = current_obj.obj
221                except AttributeError:
222                    pass
223            except ValueError as e:
224                print(e)
225                pass
226        return self
227
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)
232
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Deep Regressor

Parameters:

obj: an object
    A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification

verbose : int, optional (default=0)
    Monitor progress when fitting.

n_layers: int (default=2)
    Number of layers. `n_layers = 1` is a simple `CustomRegressor`

All the other parameters are nnetsauce `CustomRegressor`'s

Examples:

import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
def fit(self, X, y, **kwargs):
104    def fit(self, X, y, **kwargs):
105        """Fit Regression algorithms to X and y.
106        Parameters
107        ----------
108        X : array-like,
109            Training vectors, where rows is the number of samples
110            and columns is the number of features.
111        y : array-like,
112            Training vectors, where rows is the number of samples
113            and columns is the number of features.
114        **kwargs: dict
115            Additional parameters to be passed to the fit method
116            of the base learner. For example, `sample_weight`.
117        Returns
118        -------
119        A fitted object
120        """
121
122        if isinstance(X, np.ndarray):
123            X = pd.DataFrame(X)
124
125        # init layer
126        self.stacked_obj = CustomRegressor(
127            obj=self.stacked_obj,
128            n_hidden_features=self.n_hidden_features,
129            activation_name=self.activation_name,
130            a=self.a,
131            nodes_sim=self.nodes_sim,
132            bias=self.bias,
133            dropout=self.dropout,
134            direct_link=self.direct_link,
135            n_clusters=self.n_clusters,
136            cluster_encode=self.cluster_encode,
137            type_clust=self.type_clust,
138            type_scaling=self.type_scaling,
139            col_sample=self.col_sample,
140            row_sample=self.row_sample,
141            seed=self.seed,
142            backend=self.backend,
143        )
144
145        if self.verbose > 0:
146            iterator = tqdm(range(self.n_layers - 1))
147        else:
148            iterator = range(self.n_layers - 1)
149
150        for _ in iterator:
151            self.stacked_obj = deepcopy(
152                CustomRegressor(
153                    obj=self.stacked_obj,
154                    n_hidden_features=self.n_hidden_features,
155                    activation_name=self.activation_name,
156                    a=self.a,
157                    nodes_sim=self.nodes_sim,
158                    bias=self.bias,
159                    dropout=self.dropout,
160                    direct_link=self.direct_link,
161                    n_clusters=self.n_clusters,
162                    cluster_encode=self.cluster_encode,
163                    type_clust=self.type_clust,
164                    type_scaling=self.type_scaling,
165                    col_sample=self.col_sample,
166                    row_sample=self.row_sample,
167                    seed=self.seed,
168                    backend=self.backend,
169                )
170            )
171
172        self.stacked_obj.fit(X, y, **kwargs)
173
174        if self.level is not None:
175            self.stacked_obj = PredictionInterval(
176                obj=self.stacked_obj, method=self.pi_method, level=self.level
177            )
178
179        if hasattr(self.stacked_obj, "clustering_obj_"):
180            self.clustering_obj_ = self.stacked_obj.clustering_obj_
181
182        if hasattr(self.stacked_obj, "coef_"):
183            self.coef_ = self.stacked_obj.coef_
184
185        if hasattr(self.stacked_obj, "scaler_"):
186            self.scaler_ = self.stacked_obj.scaler_
187
188        if hasattr(self.stacked_obj, "nn_scaler_"):
189            self.nn_scaler_ = self.stacked_obj.nn_scaler_
190
191        if hasattr(self.stacked_obj, "clustering_scaler_"):
192            self.clustering_scaler_ = self.stacked_obj.clustering_scaler_
193
194        return self

Fit Regression algorithms to X and y.

Parameters

X : array-like, Training vectors, where rows is the number of samples and columns is the number of features. y : array-like, Training vectors, where rows is the number of samples and columns is the number of features. **kwargs: dict Additional parameters to be passed to the fit method of the base learner. For example, sample_weight.

Returns

A fitted object

def predict(self, X, **kwargs):
228    def predict(self, X, **kwargs):
229        if self.level is not None:
230            return self.stacked_obj.predict(X, return_pi=True)
231        return self.stacked_obj.predict(X, **kwargs)

Predict test data X.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

level: int
    Level of confidence (default = 95)

method: str
    'splitconformal', 'localconformal'
    prediction (if you specify `return_pi = True`)

**kwargs: additional parameters
        `return_pi = True` for conformal prediction,
        with `method` in ('splitconformal', 'localconformal')
        or `return_std = True` for `self.obj` in
        (`sklearn.linear_model.BayesianRidge`,
        `sklearn.linear_model.ARDRegressor`,
        `sklearn.gaussian_process.GaussianProcessRegressor`)`

Returns:

model predictions:
    an array if uncertainty quantification is not requested,
      or a tuple if with prediction intervals and simulations
      if `return_std = True` (mean, standard deviation,
      lower and upper prediction interval) or `return_pi = True`
      ()
def score(self, X, y, scoring=None):
233    def score(self, X, y, scoring=None):
234        return self.stacked_obj.score(X, y, scoring)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class DeepMTS(nnetsauce.MTS):
 11class DeepMTS(MTS):
 12    """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
 13
 14    Parameters:
 15
 16        obj: object.
 17            any object containing a method fit (obj.fit()) and a method predict
 18            (obj.predict()).
 19
 20        n_layers: int.
 21            number of layers in the neural network.
 22
 23        n_hidden_features: int.
 24            number of nodes in the hidden layer.
 25
 26        activation_name: str.
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
 28
 29        a: float.
 30            hyperparameter for 'prelu' or 'elu' activation function.
 31
 32        nodes_sim: str.
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'.
 35
 36        bias: boolean.
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False).
 39
 40        dropout: float.
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training.
 43
 44        direct_link: boolean.
 45            indicates if the original predictors are included (True) in model's fitting or not (False).
 46
 47        n_clusters: int.
 48            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
 49
 50        cluster_encode: bool.
 51            defines how the variable containing clusters is treated (default is one-hot)
 52            if `False`, then labels are used, without one-hot encoding.
 53
 54        type_clust: str.
 55            type of clustering method: currently k-means ('kmeans') or Gaussian
 56            Mixture Model ('gmm').
 57
 58        type_scaling: a tuple of 3 strings.
 59            scaling methods for inputs, hidden layer, and clustering respectively
 60            (and when relevant).
 61            Currently available: standardization ('std') or MinMax scaling ('minmax').
 62
 63        lags: int.
 64            number of lags used for each time series.
 65
 66        type_pi: str.
 67            type of prediction interval; currently:
 68            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
 69            - "kde": based on Kernel Density Estimation of in-sample residuals
 70            - "bootstrap": based on independent bootstrap of in-sample residuals
 71            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
 72            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
 73            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
 74            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
 75            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
 76            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
 77            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
 78
 79        block_size: int.
 80            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 81            Default is round(3.15*(n_residuals^1/3))
 82
 83        replications: int.
 84            number of replications (if needed, for predictive simulation). Default is 'None'.
 85
 86        kernel: str.
 87            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 88
 89        agg: str.
 90            either "mean" or "median" for simulation of bootstrap aggregating
 91
 92        seed: int.
 93            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 94
 95        backend: str.
 96            "cpu" or "gpu" or "tpu".
 97
 98        verbose: int.
 99            0: not printing; 1: printing
100
101        show_progress: bool.
102            True: progress bar when fitting each series; False: no progress bar when fitting each series
103
104    Attributes:
105
106        fit_objs_: dict
107            objects adjusted to each individual time series
108
109        y_: {array-like}
110            DeepMTS responses (most recent observations first)
111
112        X_: {array-like}
113            DeepMTS lags
114
115        xreg_: {array-like}
116            external regressors
117
118        y_means_: dict
119            a dictionary of each series mean values
120
121        preds_: {array-like}
122            successive model predictions
123
124        preds_std_: {array-like}
125            standard deviation around the predictions
126
127        return_std_: boolean
128            return uncertainty or not (set in predict)
129
130        df_: data frame
131            the input data frame, in case a data.frame is provided to `fit`
132
133    Examples:
134
135    Example 1:
136
137        ```python
138        import nnetsauce as ns
139        import numpy as np
140        from sklearn import linear_model
141        np.random.seed(123)
142
143        M = np.random.rand(10, 3)
144        M[:,0] = 10*M[:,0]
145        M[:,2] = 25*M[:,2]
146        print(M)
147
148        # Adjust Bayesian Ridge
149        regr4 = linear_model.BayesianRidge()
150        obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
151        obj_DeepMTS.fit(M)
152        print(obj_DeepMTS.predict())
153
154        # with credible intervals
155        print(obj_DeepMTS.predict(return_std=True, level=80))
156
157        print(obj_DeepMTS.predict(return_std=True, level=95))
158        ```
159
160    Example 2:
161
162        ```python
163        import nnetsauce as ns
164        import numpy as np
165        from sklearn import linear_model
166
167        dataset = {
168        'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
169        'series1' : [34, 30, 35.6, 33.3, 38.1],
170        'series2' : [4, 5.5, 5.6, 6.3, 5.1],
171        'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
172        df = pd.DataFrame(dataset).set_index('date')
173        print(df)
174
175        # Adjust Bayesian Ridge
176        regr5 = linear_model.BayesianRidge()
177        obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
178        obj_DeepMTS.fit(df)
179        print(obj_DeepMTS.predict())
180
181        # with credible intervals
182        print(obj_DeepMTS.predict(return_std=True, level=80))
183
184        print(obj_DeepMTS.predict(return_std=True, level=95))
185        ```
186
187    """
188
189    # construct the object -----
190
191    def __init__(
192        self,
193        obj,
194        n_layers=3,
195        n_hidden_features=5,
196        activation_name="relu",
197        a=0.01,
198        nodes_sim="sobol",
199        bias=True,
200        dropout=0,
201        direct_link=True,
202        n_clusters=2,
203        cluster_encode=True,
204        type_clust="kmeans",
205        type_scaling=("std", "std", "std"),
206        lags=1,
207        type_pi="kde",
208        block_size=None,
209        replications=None,
210        kernel=None,
211        agg="mean",
212        seed=123,
213        backend="cpu",
214        verbose=0,
215        show_progress=True,
216    ):
217        assert int(lags) == lags, "parameter 'lags' should be an integer"
218        assert n_layers >= 1, "must have n_layers >= 1"
219        self.n_layers = int(n_layers)
220
221        if self.n_layers > 1:
222            for _ in range(self.n_layers - 1):
223                obj = CustomRegressor(
224                    obj=deepcopy(obj),
225                    n_hidden_features=n_hidden_features,
226                    activation_name=activation_name,
227                    a=a,
228                    nodes_sim=nodes_sim,
229                    bias=bias,
230                    dropout=dropout,
231                    direct_link=direct_link,
232                    n_clusters=n_clusters,
233                    cluster_encode=cluster_encode,
234                    type_clust=type_clust,
235                    type_scaling=type_scaling,
236                    seed=seed,
237                    backend=backend,
238                )
239
240        self.obj = deepcopy(obj)
241        super().__init__(
242            obj=self.obj,
243            n_hidden_features=n_hidden_features,
244            activation_name=activation_name,
245            a=a,
246            nodes_sim=nodes_sim,
247            bias=bias,
248            dropout=dropout,
249            direct_link=direct_link,
250            n_clusters=n_clusters,
251            cluster_encode=cluster_encode,
252            type_clust=type_clust,
253            type_scaling=type_scaling,
254            lags=lags,
255            type_pi=type_pi,
256            block_size=block_size,
257            replications=replications,
258            kernel=kernel,
259            agg=agg,
260            seed=seed,
261            backend=backend,
262            verbose=verbose,
263            show_progress=show_progress,
264        )

Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_layers: int.
    number of layers in the neural network.

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    DeepMTS responses (most recent observations first)

X_: {array-like}
    DeepMTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())

# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))

print(obj_DeepMTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())

# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))

print(obj_DeepMTS.predict(return_std=True, level=95))
class DiscreteTokenMTS(nnetsauce.MTS):
 12class DiscreteTokenMTS(MTS):
 13    """
 14    MTS for discrete token forecasting via nearest-neighbor in embedding space.
 15
 16    Maps continuous predictions to discrete tokens using nearest-neighbor lookup
 17    in a vocabulary (embedding space). Supports probabilistic decoding with
 18    temperature-controlled softmax and uncertainty quantification in token space.
 19
 20    Parameters
 21    ----------
 22    obj : object
 23        Base learner with fit() and predict() methods
 24
 25    vocab : np.ndarray of shape (vocab_size, n_series)
 26        Token vocabulary - each row is a token embedding vector
 27
 28    metric : {'euclidean', 'cosine'}, default='euclidean'
 29        Distance metric for nearest-neighbor lookup
 30
 31    return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id'
 32        Output format:
 33        - 'token_id': integer token indices
 34        - 'token_vector': token embedding vectors
 35        - 'both': single DataFrame with token_id + dimensions
 36        - 'probs': probability distribution over all tokens
 37
 38    softmax_temperature : float, default=1.0
 39        Temperature for softmax when return_mode='probs'
 40        Lower values (0.1-0.5) → sharper distributions (more deterministic)
 41        Higher values (2.0-10.0) → smoother distributions (more exploratory)
 42
 43    normalize_vocab : bool, default=False
 44        Whether to center and scale vocabulary to zero mean, unit variance
 45
 46    **mts_kwargs : dict
 47        Additional parameters passed to MTS base class
 48
 49    Attributes
 50    ----------
 51    vocab : np.ndarray
 52        Normalized vocabulary (if normalize_vocab=True)
 53
 54    vocab_mean_ : np.ndarray
 55        Mean used for normalization (if normalize_vocab=True)
 56
 57    vocab_std_ : np.ndarray
 58        Std used for normalization (if normalize_vocab=True)
 59
 60    discretization_errors_ : pd.DataFrame or None
 61        Distances from predictions to nearest tokens
 62
 63    Warnings
 64    --------
 65    - Prediction intervals (lower/upper) are NOT discretized - only the mean
 66    - For uncertainty in token space, use predict_token_distribution()
 67    - Vocabulary quality strongly affects results - use diagnose_vocabulary()
 68
 69    Examples
 70    --------
 71    >>> # Basic token prediction
 72    >>> vocab = np.random.randn(100, 10)  # 100 tokens, 10 dimensions
 73    >>> model = DiscreteTokenMTS(
 74    ...     obj=Ridge(),
 75    ...     vocab=vocab,
 76    ...     lags=5,
 77    ...     return_mode='token_id'
 78    ... )
 79    >>> model.fit(X_train)
 80    >>> tokens = model.predict(h=10)
 81
 82    >>> # Probabilistic with temperature control
 83    >>> model = DiscreteTokenMTS(
 84    ...     obj=Ridge(),
 85    ...     vocab=vocab,
 86    ...     lags=5,
 87    ...     return_mode='probs',
 88    ...     softmax_temperature=1.5
 89    ... )
 90    >>> probs = model.predict(h=10)  # Returns probability distributions
 91
 92    >>> # Uncertainty-aware token distributions
 93    >>> freqs, entropy, mode = model.predict_token_distribution(
 94    ...     h=10,
 95    ...     replications=100
 96    ... )
 97    """
 98
 99    def __init__(
100        self,
101        obj,
102        vocab,
103        metric="euclidean",
104        return_mode="token_id",
105        softmax_temperature=1.0,
106        normalize_vocab=False,
107        **mts_kwargs,
108    ):
109        super().__init__(obj, **mts_kwargs)
110
111        # Convert and validate vocabulary
112        self.vocab_original = np.asarray(vocab, dtype=np.float64)
113        self._validate_vocabulary()
114
115        self.vocab_size = self.vocab_original.shape[0]
116        self.vocab_mean_ = None
117        self.vocab_std_ = None
118        self.normalize_vocab = normalize_vocab
119
120        # Normalize if requested
121        if normalize_vocab:
122            self._normalize_vocabulary()
123        else:
124            self.vocab = self.vocab_original.copy()
125
126        # Validate and set metric
127        assert metric in [
128            "euclidean",
129            "cosine",
130        ], "metric must be 'euclidean' or 'cosine'"
131        self.metric = metric
132        self.distance_func = (
133            euclidean_distances if metric == "euclidean" else cosine_distances
134        )
135
136        # Validate and set return mode
137        assert return_mode in [
138            "token_id",
139            "token_vector",
140            "both",
141            "probs",
142        ], "return_mode must be 'token_id', 'token_vector', 'both', or 'probs'"
143        self.return_mode = return_mode
144
145        # Validate temperature
146        assert softmax_temperature > 0, "softmax_temperature must be positive"
147        self.softmax_temperature = softmax_temperature
148
149        # Initialize error tracking
150        self.discretization_errors_ = None
151
152    def _validate_vocabulary(self):
153        """Comprehensive vocabulary validation"""
154        # Check shape
155        assert (
156            self.vocab_original.ndim == 2
157        ), "vocab must be 2D array (vocab_size, n_series)"
158        assert (
159            self.vocab_original.shape[0] > 0
160        ), "vocab must have at least one token"
161
162        # Check for NaN/Inf
163        if np.any(np.isnan(self.vocab_original)) or np.any(
164            np.isinf(self.vocab_original)
165        ):
166            raise ValueError("Vocabulary contains NaN or Inf values")
167
168        # Check for duplicates
169        unique_rows = np.unique(self.vocab_original, axis=0)
170        if len(unique_rows) < len(self.vocab_original):
171            n_duplicates = len(self.vocab_original) - len(unique_rows)
172            warnings.warn(
173                f"Vocabulary contains {n_duplicates} duplicate vectors. "
174                "This reduces effective vocabulary size.",
175                UserWarning,
176            )
177
178        # Check for near-duplicates
179        if len(self.vocab_original) > 1:
180            dists = euclidean_distances(self.vocab_original)
181            np.fill_diagonal(dists, np.inf)
182            min_dist = dists.min()
183
184            if min_dist < 1e-6:
185                warnings.warn(
186                    f"Vocabulary contains very close vectors (min distance: {min_dist:.2e}). "
187                    "Consider increasing token diversity.",
188                    UserWarning,
189                )
190
191    def _normalize_vocabulary(self):
192        """Center and scale vocabulary"""
193        self.vocab_mean_ = self.vocab_original.mean(axis=0)
194        self.vocab_std_ = self.vocab_original.std(axis=0) + 1e-8
195        self.vocab = (self.vocab_original - self.vocab_mean_) / self.vocab_std_
196
197    def fit(self, X, **kwargs):
198        """
199        Fit model and validate vocabulary dimensions match data.
200
201        Parameters
202        ----------
203        X : array-like of shape (n_samples, n_series)
204            Training data
205
206        **kwargs : dict
207            Additional parameters passed to parent fit
208
209        Returns
210        -------
211        self : object
212            Fitted estimator
213        """
214        # Call parent fit
215        super().fit(X, **kwargs)
216
217        # Validate vocabulary dimensions
218        n_series = X.shape[1] if X.ndim > 1 else 1
219        if self.vocab.shape[1] != n_series:
220            raise ValueError(
221                f"Vocabulary dimension ({self.vocab.shape[1]}) must match "
222                f"number of series ({n_series})"
223            )
224
225        # Additional check for cosine distance
226        if self.metric == "cosine":
227            norms = np.linalg.norm(self.vocab, axis=1)
228            zero_vectors = norms < 1e-10
229            if np.any(zero_vectors):
230                raise ValueError(
231                    f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. "
232                    "Cosine distance requires non-zero vectors."
233                )
234
235        return self
236
237    def _vectorized_map_to_tokens(self, continuous_preds):
238        """
239        Vectorized token mapping for efficiency.
240
241        Parameters
242        ----------
243        continuous_preds : np.ndarray of shape (h, n_series)
244            Continuous predictions
245
246        Returns
247        -------
248        result : depends on return_mode
249        errors : np.ndarray
250            Distances to nearest tokens
251        """
252        # Normalize predictions if vocabulary was normalized
253        if self.normalize_vocab:
254            continuous_preds = (
255                continuous_preds - self.vocab_mean_
256            ) / self.vocab_std_
257
258        # Compute all distances at once
259        dists = self.distance_func(continuous_preds, self.vocab)
260
261        # Find nearest tokens
262        nearest_indices = np.argmin(dists, axis=1)
263        min_dists = dists[np.arange(len(dists)), nearest_indices]
264
265        if self.return_mode == "token_id":
266            return nearest_indices, min_dists
267
268        elif self.return_mode == "token_vector":
269            token_vecs = self.vocab[nearest_indices]
270            # Denormalize if vocabulary was normalized
271            if self.normalize_vocab:
272                token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_
273            return token_vecs, min_dists
274
275        elif self.return_mode == "both":
276            # Return combined array: [token_id, dim_0, dim_1, ...]
277            token_ids = nearest_indices.reshape(-1, 1)
278            token_vecs = self.vocab[nearest_indices]
279            # Denormalize if vocabulary was normalized
280            if self.normalize_vocab:
281                token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_
282            combined = np.column_stack([token_ids, token_vecs])
283            return combined, min_dists
284
285        elif self.return_mode == "probs":
286            # Softmax of negative distances
287            probs = softmax(-dists / self.softmax_temperature, axis=1)
288            return probs, min_dists
289
290    def predict(
291        self,
292        h=5,
293        level=95,
294        quantiles=None,
295        return_discretization_error=False,
296        **kwargs,
297    ):
298        """
299        Generate discrete token predictions.
300
301        Parameters
302        ----------
303        h : int, default=5
304            Forecast horizon
305
306        level : int, default=95
307            Confidence level (only affects continuous forecasts)
308
309        quantiles : list of float, optional
310            Quantile levels
311
312        return_discretization_error : bool, default=False
313            If True, return (predictions, errors) tuple
314
315        **kwargs : dict
316            Additional parameters for parent predict
317
318        Returns
319        -------
320        predictions : pd.DataFrame
321            Discrete predictions. Format depends on return_mode:
322            - 'token_id': single column 'token_id'
323            - 'token_vector': columns 'dim_0', 'dim_1', ...
324            - 'both': columns 'token_id', 'dim_0', 'dim_1', ...
325            - 'probs': columns 'token_0_prob', 'token_1_prob', ...
326
327        errors : pd.DataFrame (if return_discretization_error=True)
328            Discretization errors (distances to nearest tokens)
329
330        Warnings
331        --------
332        When prediction intervals are requested but only mean is discretized,
333        a warning is issued. Use predict_token_distribution() for uncertainty
334        in token space.
335        """
336        # Get continuous predictions from parent
337        continuous_result = super().predict(
338            h=h, level=level, quantiles=quantiles, **kwargs
339        )
340
341        # FIXED: Robust type detection using duck typing
342        if hasattr(continuous_result, "_fields"):  # Namedtuple
343            if (
344                hasattr(continuous_result, "sims")
345                and continuous_result.sims is not None
346            ):
347                # Simulation-based forecast
348                return self._discretize_simulations(
349                    continuous_result.sims, return_discretization_error
350                )
351            elif hasattr(continuous_result, "mean"):
352                # Interval-based forecast - warn about information loss
353                warnings.warn(
354                    "Prediction intervals cannot be meaningfully discretized. "
355                    "Only mean predictions are converted to tokens. "
356                    "Use predict_token_distribution(replications=N) for "
357                    "uncertainty in token space.",
358                    UserWarning,
359                )
360                return self._discretize_dataframe(
361                    continuous_result.mean, return_discretization_error
362                )
363        elif isinstance(continuous_result, pd.DataFrame):
364            # Deterministic forecast
365            return self._discretize_dataframe(
366                continuous_result, return_discretization_error
367            )
368        else:
369            raise NotImplementedError(
370                f"Unhandled predict output type: {type(continuous_result)}"
371            )
372
373    def _discretize_dataframe(self, df, return_error=False):
374        """Discretize a continuous prediction DataFrame"""
375        # Use vectorized mapping
376        result, errors = self._vectorized_map_to_tokens(df.values)
377
378        # FIXED: Always return single DataFrame (even for 'both' mode)
379        if self.return_mode == "probs":
380            result_df = pd.DataFrame(
381                result,
382                index=df.index,
383                columns=[f"token_{i}_prob" for i in range(self.vocab_size)],
384            )
385        elif self.return_mode == "both":
386            # Combined format: token_id + dimensions
387            columns = ["token_id"] + [
388                f"dim_{i}" for i in range(self.vocab.shape[1])
389            ]
390            result_df = pd.DataFrame(result, index=df.index, columns=columns)
391            result_df["token_id"] = result_df["token_id"].astype(int)
392        elif self.return_mode == "token_id":
393            result_df = pd.DataFrame(
394                result.reshape(-1, 1), index=df.index, columns=["token_id"]
395            )
396        else:  # 'token_vector'
397            result_df = pd.DataFrame(
398                result,
399                index=df.index,
400                columns=[f"dim_{i}" for i in range(self.vocab.shape[1])],
401            )
402
403        if return_error:
404            error_df = pd.DataFrame(
405                errors.reshape(-1, 1),
406                index=df.index,
407                columns=["discretization_error"],
408            )
409            self.discretization_errors_ = error_df
410            return result_df, error_df
411
412        return result_df
413
414    def _discretize_simulations(self, sims, return_error=False):
415        """Discretize simulation paths"""
416        discrete_sims = []
417        all_errors = []
418
419        for sim_df in sims:
420            result, errors = self._vectorized_map_to_tokens(sim_df.values)
421
422            if self.return_mode == "probs":
423                discrete_df = pd.DataFrame(
424                    result,
425                    index=sim_df.index,
426                    columns=[f"token_{i}_prob" for i in range(self.vocab_size)],
427                )
428            elif self.return_mode == "both":
429                columns = ["token_id"] + [
430                    f"dim_{i}" for i in range(self.vocab.shape[1])
431                ]
432                discrete_df = pd.DataFrame(
433                    result, index=sim_df.index, columns=columns
434                )
435                discrete_df["token_id"] = discrete_df["token_id"].astype(int)
436            elif self.return_mode == "token_id":
437                discrete_df = pd.DataFrame(
438                    result.reshape(-1, 1),
439                    index=sim_df.index,
440                    columns=["token_id"],
441                )
442            else:  # 'token_vector'
443                discrete_df = pd.DataFrame(
444                    result,
445                    index=sim_df.index,
446                    columns=[f"dim_{i}" for i in range(self.vocab.shape[1])],
447                )
448
449            discrete_sims.append(discrete_df)
450
451            if return_error:
452                error_df = pd.DataFrame(
453                    errors.reshape(-1, 1),
454                    index=sim_df.index,
455                    columns=["discretization_error"],
456                )
457                all_errors.append(error_df)
458
459        if return_error:
460            return tuple(discrete_sims), tuple(all_errors)
461        return tuple(discrete_sims)
462
463    # ========== NEW: Uncertainty Quantification in Token Space ==========
464
465    def predict_top_k(self, h=5, k=5, **kwargs):
466        """
467        Predict top-k most probable tokens per timestep.
468
469        Parameters
470        ----------
471        h : int
472            Forecast horizon
473        k : int
474            Number of top tokens to return
475        **kwargs : dict
476            Additional parameters for parent predict
477
478        Returns
479        -------
480        predictions : pd.DataFrame
481            Columns: token_1, prob_1, token_2, prob_2, ..., token_k, prob_k
482        """
483        continuous_result = super().predict(h=h, **kwargs)
484
485        # Handle different return types
486        if hasattr(continuous_result, "mean"):
487            preds = continuous_result.mean.values
488            index = continuous_result.mean.index
489        elif isinstance(continuous_result, pd.DataFrame):
490            preds = continuous_result.values
491            index = continuous_result.index
492        else:
493            raise ValueError("Cannot extract continuous predictions")
494
495        # Compute probabilities
496        dists = self.distance_func(preds, self.vocab)
497        probs = softmax(-dists / self.softmax_temperature, axis=1)
498
499        # Get top-k
500        top_k_indices = np.argsort(probs, axis=1)[:, -k:][:, ::-1]
501        top_k_probs = np.take_along_axis(probs, top_k_indices, axis=1)
502
503        # Format as DataFrame
504        columns = []
505        data = []
506        for i in range(k):
507            columns.extend([f"token_{i+1}", f"prob_{i+1}"])
508            data.append(top_k_indices[:, i])
509            data.append(top_k_probs[:, i])
510
511        return pd.DataFrame(np.column_stack(data), index=index, columns=columns)
512
513    def predict_token_distribution(self, h=5, replications=100, **kwargs):
514        """
515        Generate token probability distribution from simulation ensemble.
516
517        This method provides meaningful uncertainty quantification in token space
518        by discretizing multiple simulation paths and computing token frequencies.
519
520        Parameters
521        ----------
522        h : int
523            Forecast horizon
524        replications : int
525            Number of simulation paths
526        **kwargs : dict
527            Additional parameters for parent predict
528
529        Returns
530        -------
531        frequencies : pd.DataFrame
532            Token frequencies across simulations
533            Columns: token_0_freq, token_1_freq, ..., token_V_freq
534
535        entropy : pd.Series
536            Shannon entropy per timestep (uncertainty measure)
537
538        mode_tokens : pd.DataFrame
539            Most frequent token per timestep
540
541        Examples
542        --------
543        >>> freqs, entropy, mode = model.predict_token_distribution(h=10, replications=100)
544        >>> # High entropy → uncertain prediction
545        >>> uncertain_steps = entropy[entropy > 2.0]
546        >>> # Use mode tokens for point predictions
547        >>> predictions = mode['mode_token'].values
548        """
549        # Force simulation mode
550        kwargs["replications"] = replications
551        continuous_result = super().predict(h=h, **kwargs)
552
553        # Extract simulations
554        if (
555            hasattr(continuous_result, "sims")
556            and continuous_result.sims is not None
557        ):
558            sims = continuous_result.sims
559            index = continuous_result.mean.index
560        else:
561            raise ValueError(
562                "predict_token_distribution requires simulation-based forecasting. "
563                "Ensure replications > 0 and type_pi supports simulations."
564            )
565
566        # Discretize all paths
567        all_tokens = []
568        for sim in sims:
569            tokens, _ = self._vectorized_map_to_tokens(sim.values)
570            if self.return_mode == "probs":
571                # For probs mode, get argmax token
572                tokens = np.argmax(tokens, axis=1)
573            elif self.return_mode == "both":
574                # Extract token_id column
575                tokens = tokens[:, 0].astype(int)
576            elif self.return_mode == "token_vector":
577                # Map back to token IDs
578                dists = self.distance_func(tokens, self.vocab)
579                tokens = np.argmin(dists, axis=1)
580            # else: token_id mode, already correct
581
582            all_tokens.append(tokens)
583
584        all_tokens = np.array(all_tokens)  # (replications, h)
585
586        # Compute frequency distribution
587        h_actual = all_tokens.shape[1]
588        token_freqs = np.zeros((h_actual, self.vocab_size))
589
590        for t in range(h_actual):
591            unique, counts = np.unique(all_tokens[:, t], return_counts=True)
592            token_freqs[t, unique] = counts / replications
593
594        # Compute entropy
595        epsilon = 1e-10
596        entropy = -np.sum(token_freqs * np.log(token_freqs + epsilon), axis=1)
597
598        # Get mode
599        mode_tokens = np.argmax(token_freqs, axis=1)
600
601        # Package results
602        freq_df = pd.DataFrame(
603            token_freqs,
604            index=index,
605            columns=[f"token_{i}_freq" for i in range(self.vocab_size)],
606        )
607
608        entropy_series = pd.Series(entropy, index=index, name="entropy")
609
610        mode_df = pd.DataFrame(mode_tokens, index=index, columns=["mode_token"])
611
612        return freq_df, entropy_series, mode_df
613
614    # ========== Utility Methods ==========
615
616    def tokens_to_vectors(self, token_ids):
617        """Convert token IDs to embedding vectors (in original scale)"""
618        token_ids = np.asarray(token_ids).astype(int)
619        assert np.all(
620            (token_ids >= 0) & (token_ids < self.vocab_size)
621        ), f"Token IDs must be in range [0, {self.vocab_size-1}]"
622        vectors = self.vocab[token_ids]
623        # Denormalize if vocabulary was normalized
624        if self.normalize_vocab:
625            vectors = vectors * self.vocab_std_ + self.vocab_mean_
626        return vectors
627
628    def get_token_neighbors(self, token_id, k=5):
629        """Find k nearest neighbors of a token"""
630        assert (
631            0 <= token_id < self.vocab_size
632        ), f"token_id must be in range [0, {self.vocab_size-1}]"
633
634        token_vec = self.vocab[token_id].reshape(1, -1)
635        dists = self.distance_func(token_vec, self.vocab).flatten()
636
637        sorted_indices = np.argsort(dists)
638        sorted_indices = sorted_indices[sorted_indices != token_id][:k]
639
640        return pd.DataFrame(
641            {"neighbor_id": sorted_indices, "distance": dists[sorted_indices]}
642        )
643
644    def compute_vocab_coverage(self, predictions):
645        """Compute vocabulary usage statistics"""
646        if "token_id" not in predictions.columns:
647            raise ValueError("predictions must have 'token_id' column")
648
649        token_ids = predictions["token_id"].values
650        unique_tokens = np.unique(token_ids)
651        freq = pd.Series(token_ids).value_counts().sort_index()
652
653        return {
654            "unique_tokens": len(unique_tokens),
655            "coverage_pct": 100 * len(unique_tokens) / self.vocab_size,
656            "token_frequencies": freq,
657            "most_common_token": freq.idxmax() if len(freq) > 0 else None,
658            "least_common_token": freq.idxmin() if len(freq) > 0 else None,
659        }
660
661    def diagnose_vocabulary(self):
662        """
663        Comprehensive vocabulary quality diagnostics.
664
665        Returns
666        -------
667        report : dict
668            Quality metrics including distances, condition number, coverage
669        """
670        # Use original vocabulary for diagnostics to get meaningful statistics
671        vocab_to_diagnose = self.vocab_original
672
673        report = {
674            "vocab_size": self.vocab_size,
675            "embedding_dim": vocab_to_diagnose.shape[1],
676            "normalized": self.normalize_vocab,
677        }
678
679        # Pairwise distances
680        dists = euclidean_distances(vocab_to_diagnose)
681        np.fill_diagonal(dists, np.inf)
682
683        report["min_pairwise_distance"] = dists.min()
684        report["max_pairwise_distance"] = dists.max()
685        report["mean_pairwise_distance"] = dists[dists != np.inf].mean()
686
687        # Condition number
688        U, s, Vt = np.linalg.svd(vocab_to_diagnose, full_matrices=False)
689        report["condition_number"] = s.max() / (s.min() + 1e-10)
690
691        # Coverage volume
692        ranges = vocab_to_diagnose.max(axis=0) - vocab_to_diagnose.min(axis=0)
693        report["coverage_volume"] = np.prod(ranges)
694
695        # Duplicates
696        unique_rows = np.unique(vocab_to_diagnose, axis=0)
697        report["duplicate_count"] = len(vocab_to_diagnose) - len(unique_rows)
698
699        return report
700
701    def print_vocabulary_report(self):
702        """Print human-readable vocabulary diagnostics"""
703        report = self.diagnose_vocabulary()
704
705        print("=" * 60)
706        print("VOCABULARY QUALITY REPORT")
707        print("=" * 60)
708        print(f"Vocabulary size: {report['vocab_size']} tokens")
709        print(f"Embedding dimension: {report['embedding_dim']}")
710        print(f"\nPairwise Distances:")
711        print(f"  Min:  {report['min_pairwise_distance']:.6f}")
712        print(f"  Mean: {report['mean_pairwise_distance']:.6f}")
713        print(f"  Max:  {report['max_pairwise_distance']:.6f}")
714        print(f"\nVocabulary Health:")
715        print(f"  Condition number: {report['condition_number']:.2f}")
716        if report["condition_number"] > 1000:
717            print(
718                "  ⚠️  WARNING: High condition number may indicate redundant tokens"
719            )
720        print(f"  Duplicate tokens: {report['duplicate_count']}")
721        if report["duplicate_count"] > 0:
722            print("  ⚠️  WARNING: Duplicates reduce effective vocabulary size")
723        print(f"  Coverage volume: {report['coverage_volume']:.2e}")
724        print("=" * 60)

MTS for discrete token forecasting via nearest-neighbor in embedding space.

Maps continuous predictions to discrete tokens using nearest-neighbor lookup in a vocabulary (embedding space). Supports probabilistic decoding with temperature-controlled softmax and uncertainty quantification in token space.

Parameters

obj : object Base learner with fit() and predict() methods

vocab : np.ndarray of shape (vocab_size, n_series) Token vocabulary - each row is a token embedding vector

metric : {'euclidean', 'cosine'}, default='euclidean' Distance metric for nearest-neighbor lookup

return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' Output format: - 'token_id': integer token indices - 'token_vector': token embedding vectors - 'both': single DataFrame with token_id + dimensions - 'probs': probability distribution over all tokens

softmax_temperature : float, default=1.0 Temperature for softmax when return_mode='probs' Lower values (0.1-0.5) → sharper distributions (more deterministic) Higher values (2.0-10.0) → smoother distributions (more exploratory)

normalize_vocab : bool, default=False Whether to center and scale vocabulary to zero mean, unit variance

**mts_kwargs : dict Additional parameters passed to MTS base class

Attributes

vocab : np.ndarray Normalized vocabulary (if normalize_vocab=True)

vocab_mean_ : np.ndarray Mean used for normalization (if normalize_vocab=True)

vocab_std_ : np.ndarray Std used for normalization (if normalize_vocab=True)

discretization_errors_ : pd.DataFrame or None Distances from predictions to nearest tokens

Warnings

  • Prediction intervals (lower/upper) are NOT discretized - only the mean
  • For uncertainty in token space, use predict_token_distribution()
  • Vocabulary quality strongly affects results - use diagnose_vocabulary()

Examples

>>> # Basic token prediction
>>> vocab = np.random.randn(100, 10)  # 100 tokens, 10 dimensions
>>> model = DiscreteTokenMTS(
...     obj=Ridge(),
...     vocab=vocab,
...     lags=5,
...     return_mode='token_id'
... )
>>> model.fit(X_train)
>>> tokens = model.predict(h=10)
>>> # Probabilistic with temperature control
>>> model = DiscreteTokenMTS(
...     obj=Ridge(),
...     vocab=vocab,
...     lags=5,
...     return_mode='probs',
...     softmax_temperature=1.5
... )
>>> probs = model.predict(h=10)  # Returns probability distributions
>>> # Uncertainty-aware token distributions
>>> freqs, entropy, mode = model.predict_token_distribution(
...     h=10,
...     replications=100
... )
def fit(self, X, **kwargs):
197    def fit(self, X, **kwargs):
198        """
199        Fit model and validate vocabulary dimensions match data.
200
201        Parameters
202        ----------
203        X : array-like of shape (n_samples, n_series)
204            Training data
205
206        **kwargs : dict
207            Additional parameters passed to parent fit
208
209        Returns
210        -------
211        self : object
212            Fitted estimator
213        """
214        # Call parent fit
215        super().fit(X, **kwargs)
216
217        # Validate vocabulary dimensions
218        n_series = X.shape[1] if X.ndim > 1 else 1
219        if self.vocab.shape[1] != n_series:
220            raise ValueError(
221                f"Vocabulary dimension ({self.vocab.shape[1]}) must match "
222                f"number of series ({n_series})"
223            )
224
225        # Additional check for cosine distance
226        if self.metric == "cosine":
227            norms = np.linalg.norm(self.vocab, axis=1)
228            zero_vectors = norms < 1e-10
229            if np.any(zero_vectors):
230                raise ValueError(
231                    f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. "
232                    "Cosine distance requires non-zero vectors."
233                )
234
235        return self

Fit model and validate vocabulary dimensions match data.

Parameters

X : array-like of shape (n_samples, n_series) Training data

**kwargs : dict Additional parameters passed to parent fit

Returns

self : object Fitted estimator

def predict( self, h=5, level=95, quantiles=None, return_discretization_error=False, **kwargs):
290    def predict(
291        self,
292        h=5,
293        level=95,
294        quantiles=None,
295        return_discretization_error=False,
296        **kwargs,
297    ):
298        """
299        Generate discrete token predictions.
300
301        Parameters
302        ----------
303        h : int, default=5
304            Forecast horizon
305
306        level : int, default=95
307            Confidence level (only affects continuous forecasts)
308
309        quantiles : list of float, optional
310            Quantile levels
311
312        return_discretization_error : bool, default=False
313            If True, return (predictions, errors) tuple
314
315        **kwargs : dict
316            Additional parameters for parent predict
317
318        Returns
319        -------
320        predictions : pd.DataFrame
321            Discrete predictions. Format depends on return_mode:
322            - 'token_id': single column 'token_id'
323            - 'token_vector': columns 'dim_0', 'dim_1', ...
324            - 'both': columns 'token_id', 'dim_0', 'dim_1', ...
325            - 'probs': columns 'token_0_prob', 'token_1_prob', ...
326
327        errors : pd.DataFrame (if return_discretization_error=True)
328            Discretization errors (distances to nearest tokens)
329
330        Warnings
331        --------
332        When prediction intervals are requested but only mean is discretized,
333        a warning is issued. Use predict_token_distribution() for uncertainty
334        in token space.
335        """
336        # Get continuous predictions from parent
337        continuous_result = super().predict(
338            h=h, level=level, quantiles=quantiles, **kwargs
339        )
340
341        # FIXED: Robust type detection using duck typing
342        if hasattr(continuous_result, "_fields"):  # Namedtuple
343            if (
344                hasattr(continuous_result, "sims")
345                and continuous_result.sims is not None
346            ):
347                # Simulation-based forecast
348                return self._discretize_simulations(
349                    continuous_result.sims, return_discretization_error
350                )
351            elif hasattr(continuous_result, "mean"):
352                # Interval-based forecast - warn about information loss
353                warnings.warn(
354                    "Prediction intervals cannot be meaningfully discretized. "
355                    "Only mean predictions are converted to tokens. "
356                    "Use predict_token_distribution(replications=N) for "
357                    "uncertainty in token space.",
358                    UserWarning,
359                )
360                return self._discretize_dataframe(
361                    continuous_result.mean, return_discretization_error
362                )
363        elif isinstance(continuous_result, pd.DataFrame):
364            # Deterministic forecast
365            return self._discretize_dataframe(
366                continuous_result, return_discretization_error
367            )
368        else:
369            raise NotImplementedError(
370                f"Unhandled predict output type: {type(continuous_result)}"
371            )

Generate discrete token predictions.

Parameters

h : int, default=5 Forecast horizon

level : int, default=95 Confidence level (only affects continuous forecasts)

quantiles : list of float, optional Quantile levels

return_discretization_error : bool, default=False If True, return (predictions, errors) tuple

**kwargs : dict Additional parameters for parent predict

Returns

predictions : pd.DataFrame Discrete predictions. Format depends on return_mode: - 'token_id': single column 'token_id' - 'token_vector': columns 'dim_0', 'dim_1', ... - 'both': columns 'token_id', 'dim_0', 'dim_1', ... - 'probs': columns 'token_0_prob', 'token_1_prob', ...

errors : pd.DataFrame (if return_discretization_error=True) Discretization errors (distances to nearest tokens)

Warnings

When prediction intervals are requested but only mean is discretized, a warning is issued. Use predict_token_distribution() for uncertainty in token space.

class Downloader:
 6class Downloader:
 7    """Download datasets from data sources (R-universe for now)"""
 8
 9    def __init__(self):
10        self.pkgname = None
11        self.dataset = None
12        self.source = None
13        self.url = None
14        self.request = None
15
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

def download( self, pkgname='MASS', dataset='Boston', source='https://cran.r-universe.dev/', **kwargs):
16    def download(
17        self,
18        pkgname="MASS",
19        dataset="Boston",
20        source="https://cran.r-universe.dev/",
21        **kwargs
22    ):
23        """Download datasets from data sources (R-universe for now)
24
25        Examples:
26
27        ```python
28        import nnetsauce as ns
29
30        downloader = ns.Downloader()
31        df = downloader.download(pkgname="MASS", dataset="Boston")
32        ```
33
34        """
35        self.pkgname = pkgname
36        self.dataset = dataset
37        self.source = source
38        self.url = source + pkgname + "/data/" + dataset + "/json"
39        self.request = requests.get(self.url)
40        return pd.DataFrame(self.request.json(), **kwargs)

Download datasets from data sources (R-universe for now)

Examples:

import nnetsauce as ns

downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
class ElasticNet2Regressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 7class ElasticNet2Regressor(BaseEstimator, RegressorMixin):
 8    def __init__(
 9        self,
10        n_hidden_features=100,
11        alpha=1.0,
12        l1_ratio=0.5,
13        lambd=0.1,
14        activation_name="tanh",
15        a=0.01,
16        max_iter=1000,
17        tol=1e-4,
18        random_state=None,
19    ):
20        self.n_hidden_features = n_hidden_features
21        self.alpha = alpha
22        self.l1_ratio = l1_ratio
23        self.lambd = lambd
24        self.activation_name = activation_name
25        self.a = a
26        self.max_iter = max_iter
27        self.tol = tol
28        self.random_state = random_state
29
30    def _activation(self, Z):
31        if self.activation_name == "relu":
32            return np.maximum(0, Z)
33        elif self.activation_name == "tanh":
34            return np.tanh(Z)
35        elif self.activation_name == "sigmoid":
36            return 1 / (1 + np.exp(-Z))
37        elif self.activation_name == "prelu":
38            return np.where(Z > 0, Z, self.a * Z)
39        elif self.activation_name == "elu":
40            return np.where(Z > 0, Z, self.a * (np.exp(Z) - 1))
41        else:
42            raise ValueError(f"Unknown activation: {self.activation_name}")
43
44    def fit(self, X, y):
45        X, y = check_X_y(X, y)
46        rng = np.random.RandomState(self.random_state)
47
48        # Standardize inputs
49        self.X_mean_ = X.mean(axis=0)
50        self.X_std_ = X.std(axis=0) + 1e-8
51        X_scaled = (X - self.X_mean_) / self.X_std_
52
53        # Center response
54        self.y_mean_ = y.mean()
55        y_centered = y - self.y_mean_
56
57        # Random feature mapping
58        self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features)
59        self.b_in_ = rng.randn(self.n_hidden_features)
60        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
61
62        # Doubly-constrained optimization with Elastic Net
63        beta = np.zeros(self.n_hidden_features)
64
65        for _ in range(self.max_iter):
66            beta_old = beta.copy()
67
68            # Gradient descent step with projection
69            grad = H.T @ (H @ beta - y_centered) / len(y)
70            step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio))
71
72            # Soft thresholding (L1)
73            beta = beta - step * grad
74            threshold = step * self.alpha * self.l1_ratio
75            beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0)
76
77            # L2 projection (constraint)
78            norm = np.linalg.norm(beta)
79            if norm > self.lambd:
80                beta = beta * (self.lambd / norm)
81
82            if np.linalg.norm(beta - beta_old) < self.tol:
83                break
84
85        self.beta_ = beta
86        return self
87
88    def predict(self, X):
89        X = check_array(X)
90        X_scaled = (X - self.X_mean_) / self.X_std_
91        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
92        return H @ self.beta_ + self.y_mean_

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y):
44    def fit(self, X, y):
45        X, y = check_X_y(X, y)
46        rng = np.random.RandomState(self.random_state)
47
48        # Standardize inputs
49        self.X_mean_ = X.mean(axis=0)
50        self.X_std_ = X.std(axis=0) + 1e-8
51        X_scaled = (X - self.X_mean_) / self.X_std_
52
53        # Center response
54        self.y_mean_ = y.mean()
55        y_centered = y - self.y_mean_
56
57        # Random feature mapping
58        self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features)
59        self.b_in_ = rng.randn(self.n_hidden_features)
60        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
61
62        # Doubly-constrained optimization with Elastic Net
63        beta = np.zeros(self.n_hidden_features)
64
65        for _ in range(self.max_iter):
66            beta_old = beta.copy()
67
68            # Gradient descent step with projection
69            grad = H.T @ (H @ beta - y_centered) / len(y)
70            step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio))
71
72            # Soft thresholding (L1)
73            beta = beta - step * grad
74            threshold = step * self.alpha * self.l1_ratio
75            beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0)
76
77            # L2 projection (constraint)
78            norm = np.linalg.norm(beta)
79            if norm > self.lambd:
80                beta = beta * (self.lambd / norm)
81
82            if np.linalg.norm(beta - beta_old) < self.tol:
83                break
84
85        self.beta_ = beta
86        return self
def predict(self, X):
88    def predict(self, X):
89        X = check_array(X)
90        X_scaled = (X - self.X_mean_) / self.X_std_
91        H = self._activation(X_scaled @ self.W_in_ + self.b_in_)
92        return H @ self.beta_ + self.y_mean_
class GLMClassifier(nnetsauce.glm.glm.GLM, sklearn.base.ClassifierMixin):
 23class GLMClassifier(GLM, ClassifierMixin):
 24    """Generalized 'linear' models using quasi-randomized networks (classification)
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        lambda1: float
 32            regularization parameter for GLM coefficients on original features
 33
 34        alpha1: float
 35            controls compromize between l1 and l2 norm of GLM coefficients on original features
 36
 37        lambda2: float
 38            regularization parameter for GLM coefficients on nonlinear features
 39
 40        alpha2: float
 41            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 42
 43        activation_name: str
 44            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 45
 46        a: float
 47            hyperparameter for 'prelu' or 'elu' activation function
 48
 49        nodes_sim: str
 50            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 51            'uniform'
 52
 53        bias: boolean
 54            indicates if the hidden layer contains a bias term (True) or not
 55            (False)
 56
 57        dropout: float
 58            regularization parameter; (random) percentage of nodes dropped out
 59            of the training
 60
 61        direct_link: boolean
 62            indicates if the original predictors are included (True) in model's
 63            fitting or not (False)
 64
 65        n_clusters: int
 66            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 67                no clustering)
 68
 69        cluster_encode: bool
 70            defines how the variable containing clusters is treated (default is one-hot)
 71            if `False`, then labels are used, without one-hot encoding
 72
 73        type_clust: str
 74            type of clustering method: currently k-means ('kmeans') or Gaussian
 75            Mixture Model ('gmm')
 76
 77        type_scaling: a tuple of 3 strings
 78            scaling methods for inputs, hidden layer, and clustering respectively
 79            (and when relevant).
 80            Currently available: standardization ('std') or MinMax scaling ('minmax')
 81
 82        optimizer: object
 83            optimizer, from class nnetsauce.Optimizer
 84
 85        backend: str.
 86            "cpu" or "gpu" or "tpu".
 87
 88        seed: int
 89            reproducibility seed for nodes_sim=='uniform'
 90
 91    Attributes:
 92
 93        beta_: vector
 94            regression coefficients
 95
 96    Examples:
 97
 98    See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py)
 99
100    """
101
102    # construct the object -----
103    _estimator_type = "classifier"
104
105    def __init__(
106        self,
107        n_hidden_features=5,
108        lambda1=0.01,
109        alpha1=0.5,
110        lambda2=0.01,
111        alpha2=0.5,
112        family="expit",
113        activation_name="relu",
114        a=0.01,
115        nodes_sim="sobol",
116        bias=True,
117        dropout=0,
118        direct_link=True,
119        n_clusters=2,
120        cluster_encode=True,
121        type_clust="kmeans",
122        type_scaling=("std", "std", "std"),
123        optimizer=Optimizer(),
124        backend="cpu",
125        seed=123,
126    ):
127        super().__init__(
128            n_hidden_features=n_hidden_features,
129            lambda1=lambda1,
130            alpha1=alpha1,
131            lambda2=lambda2,
132            alpha2=alpha2,
133            activation_name=activation_name,
134            a=a,
135            nodes_sim=nodes_sim,
136            bias=bias,
137            dropout=dropout,
138            direct_link=direct_link,
139            n_clusters=n_clusters,
140            cluster_encode=cluster_encode,
141            type_clust=type_clust,
142            type_scaling=type_scaling,
143            optimizer=optimizer,
144            backend=backend,
145            seed=seed,
146        )
147
148        self.family = family
149
150    def logit_loss(self, Y, row_index, XB):
151        self.n_classes = Y.shape[1]  # len(np.unique(y))
152        # Y = mo.one_hot_encode2(y, self.n_classes)
153        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
154
155        # max_double = 709.0 # only if softmax
156        # XB[XB > max_double] = max_double
157        XB[XB > 709.0] = 709.0
158
159        if row_index is None:
160            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
161
162        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
163
164    def expit_erf_loss(self, Y, row_index, XB):
165        # self.n_classes = len(np.unique(y))
166        # Y = mo.one_hot_encode2(y, self.n_classes)
167        # Y = self.optimizer.one_hot_encode(y, self.n_classes)
168        self.n_classes = Y.shape[1]
169
170        if row_index is None:
171            return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB))
172
173        return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB))
174
175    def loss_func(
176        self,
177        beta,
178        group_index,
179        X,
180        Y,
181        y,
182        row_index=None,
183        type_loss="logit",
184        **kwargs
185    ):
186        res = {
187            "logit": self.logit_loss,
188            "expit": self.expit_erf_loss,
189            "erf": self.expit_erf_loss,
190        }
191
192        if row_index is None:
193            row_index = range(len(y))
194            XB = self.compute_XB(
195                X,
196                beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
197            )
198
199            return res[type_loss](Y, row_index, XB) + self.compute_penalty(
200                group_index=group_index, beta=beta
201            )
202
203        XB = self.compute_XB(
204            X,
205            beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"),
206            row_index=row_index,
207        )
208
209        return res[type_loss](Y, row_index, XB) + self.compute_penalty(
210            group_index=group_index, beta=beta
211        )
212
213    def fit(self, X, y, **kwargs):
214        """Fit GLM model to training data (X, y).
215
216        Args:
217
218            X: {array-like}, shape = [n_samples, n_features]
219                Training vectors, where n_samples is the number
220                of samples and n_features is the number of features.
221
222            y: array-like, shape = [n_samples]
223                Target values.
224
225            **kwargs: additional parameters to be passed to
226                    self.cook_training_set or self.obj.fit
227
228        Returns:
229
230            self: object
231
232        """
233
234        assert mx.is_factor(
235            y
236        ), "y must contain only integers"  # change is_factor and subsampling everywhere
237
238        self.classes_ = np.unique(y)  # for compatibility with sklearn
239        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
240
241        self.beta_ = None
242
243        n, p = X.shape
244
245        self.group_index = n * X.shape[1]
246
247        self.n_classes = len(np.unique(y))
248
249        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
250
251        # Y = mo.one_hot_encode2(output_y, self.n_classes)
252        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
253
254        # initialization
255        if self.backend == "cpu":
256            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257        else:
258            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
259
260        # optimization
261        # fit(self, loss_func, response, x0, **kwargs):
262        # loss_func(self, beta, group_index, X, y,
263        #          row_index=None, type_loss="gaussian",
264        #          **kwargs)
265        self.optimizer.fit(
266            self.loss_func,
267            response=y,
268            x0=beta_.flatten(order="F"),
269            group_index=self.group_index,
270            X=scaled_Z,
271            Y=Y,
272            y=y,
273            type_loss=self.family,
274        )
275
276        self.beta_ = self.optimizer.results[0]
277        self.classes_ = np.unique(y)
278
279        return self
280
281    def predict(self, X, **kwargs):
282        """Predict test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            model predictions: {array-like}
296
297        """
298
299        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
300
301    def predict_proba(self, X, **kwargs):
302        """Predict probabilities for test data X.
303
304        Args:
305
306            X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features.
309
310            **kwargs: additional parameters to be passed to
311                    self.cook_test_set
312
313        Returns:
314
315            probability estimates for test data: {array-like}
316
317        """
318        if len(X.shape) == 1:
319            n_features = X.shape[0]
320            new_X = mo.rbind(
321                X.reshape(1, n_features),
322                np.ones(n_features).reshape(1, n_features),
323            )
324
325            Z = self.cook_test_set(new_X, **kwargs)
326
327        else:
328            Z = self.cook_test_set(X, **kwargs)
329
330        ZB = mo.safe_sparse_dot(
331            Z,
332            self.beta_.reshape(
333                self.n_classes,
334                X.shape[1] + self.n_hidden_features + self.n_clusters,
335            ).T,
336        )
337
338        if self.family == "logit":
339            exp_ZB = np.exp(ZB)
340
341            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
342
343        if self.family == "expit":
344            exp_ZB = expit(ZB)
345
346            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
347
348        if self.family == "erf":
349            exp_ZB = 0.5 * (1 + erf(ZB))
350
351            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
352
353    def score(self, X, y, scoring=None):
354        """Scoring function for classification.
355
356        Args:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number
360                of samples and n_features is the number of features.
361
362            y: array-like, shape = [n_samples]
363                Target values.
364
365            scoring: str
366                scoring method (default is accuracy)
367
368        Returns:
369
370            score: float
371        """
372
373        if scoring is None:
374            scoring = "accuracy"
375
376        if scoring == "accuracy":
377            return skm2.accuracy_score(y, self.predict(X))
378
379        if scoring == "f1":
380            return skm2.f1_score(y, self.predict(X))
381
382        if scoring == "precision":
383            return skm2.precision_score(y, self.predict(X))
384
385        if scoring == "recall":
386            return skm2.recall_score(y, self.predict(X))
387
388        if scoring == "roc_auc":
389            return skm2.roc_auc_score(y, self.predict(X))
390
391        if scoring == "log_loss":
392            return skm2.log_loss(y, self.predict_proba(X))
393
394        if scoring == "balanced_accuracy":
395            return skm2.balanced_accuracy_score(y, self.predict(X))
396
397        if scoring == "average_precision":
398            return skm2.average_precision_score(y, self.predict(X))
399
400        if scoring == "neg_brier_score":
401            return -skm2.brier_score_loss(y, self.predict_proba(X))
402
403        if scoring == "neg_log_loss":
404            return -skm2.log_loss(y, self.predict_proba(X))
405
406    @property
407    def _estimator_type(self):
408        return "classifier"

Generalized 'linear' models using quasi-randomized networks (classification)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

Attributes:

beta_: vector
    regression coefficients

Examples:

See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py

def fit(self, X, y, **kwargs):
213    def fit(self, X, y, **kwargs):
214        """Fit GLM model to training data (X, y).
215
216        Args:
217
218            X: {array-like}, shape = [n_samples, n_features]
219                Training vectors, where n_samples is the number
220                of samples and n_features is the number of features.
221
222            y: array-like, shape = [n_samples]
223                Target values.
224
225            **kwargs: additional parameters to be passed to
226                    self.cook_training_set or self.obj.fit
227
228        Returns:
229
230            self: object
231
232        """
233
234        assert mx.is_factor(
235            y
236        ), "y must contain only integers"  # change is_factor and subsampling everywhere
237
238        self.classes_ = np.unique(y)  # for compatibility with sklearn
239        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
240
241        self.beta_ = None
242
243        n, p = X.shape
244
245        self.group_index = n * X.shape[1]
246
247        self.n_classes = len(np.unique(y))
248
249        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
250
251        # Y = mo.one_hot_encode2(output_y, self.n_classes)
252        Y = self.optimizer.one_hot_encode(output_y, self.n_classes)
253
254        # initialization
255        if self.backend == "cpu":
256            beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
257        else:
258            beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0]
259
260        # optimization
261        # fit(self, loss_func, response, x0, **kwargs):
262        # loss_func(self, beta, group_index, X, y,
263        #          row_index=None, type_loss="gaussian",
264        #          **kwargs)
265        self.optimizer.fit(
266            self.loss_func,
267            response=y,
268            x0=beta_.flatten(order="F"),
269            group_index=self.group_index,
270            X=scaled_Z,
271            Y=Y,
272            y=y,
273            type_loss=self.family,
274        )
275
276        self.beta_ = self.optimizer.results[0]
277        self.classes_ = np.unique(y)
278
279        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
281    def predict(self, X, **kwargs):
282        """Predict test data X.
283
284        Args:
285
286            X: {array-like}, shape = [n_samples, n_features]
287                Training vectors, where n_samples is the number
288                of samples and n_features is the number of features.
289
290            **kwargs: additional parameters to be passed to
291                    self.cook_test_set
292
293        Returns:
294
295            model predictions: {array-like}
296
297        """
298
299        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
301    def predict_proba(self, X, **kwargs):
302        """Predict probabilities for test data X.
303
304        Args:
305
306            X: {array-like}, shape = [n_samples, n_features]
307                Training vectors, where n_samples is the number
308                of samples and n_features is the number of features.
309
310            **kwargs: additional parameters to be passed to
311                    self.cook_test_set
312
313        Returns:
314
315            probability estimates for test data: {array-like}
316
317        """
318        if len(X.shape) == 1:
319            n_features = X.shape[0]
320            new_X = mo.rbind(
321                X.reshape(1, n_features),
322                np.ones(n_features).reshape(1, n_features),
323            )
324
325            Z = self.cook_test_set(new_X, **kwargs)
326
327        else:
328            Z = self.cook_test_set(X, **kwargs)
329
330        ZB = mo.safe_sparse_dot(
331            Z,
332            self.beta_.reshape(
333                self.n_classes,
334                X.shape[1] + self.n_hidden_features + self.n_clusters,
335            ).T,
336        )
337
338        if self.family == "logit":
339            exp_ZB = np.exp(ZB)
340
341            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
342
343        if self.family == "expit":
344            exp_ZB = expit(ZB)
345
346            return exp_ZB / exp_ZB.sum(axis=1)[:, None]
347
348        if self.family == "erf":
349            exp_ZB = 0.5 * (1 + erf(ZB))
350
351            return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
353    def score(self, X, y, scoring=None):
354        """Scoring function for classification.
355
356        Args:
357
358            X: {array-like}, shape = [n_samples, n_features]
359                Training vectors, where n_samples is the number
360                of samples and n_features is the number of features.
361
362            y: array-like, shape = [n_samples]
363                Target values.
364
365            scoring: str
366                scoring method (default is accuracy)
367
368        Returns:
369
370            score: float
371        """
372
373        if scoring is None:
374            scoring = "accuracy"
375
376        if scoring == "accuracy":
377            return skm2.accuracy_score(y, self.predict(X))
378
379        if scoring == "f1":
380            return skm2.f1_score(y, self.predict(X))
381
382        if scoring == "precision":
383            return skm2.precision_score(y, self.predict(X))
384
385        if scoring == "recall":
386            return skm2.recall_score(y, self.predict(X))
387
388        if scoring == "roc_auc":
389            return skm2.roc_auc_score(y, self.predict(X))
390
391        if scoring == "log_loss":
392            return skm2.log_loss(y, self.predict_proba(X))
393
394        if scoring == "balanced_accuracy":
395            return skm2.balanced_accuracy_score(y, self.predict(X))
396
397        if scoring == "average_precision":
398            return skm2.average_precision_score(y, self.predict(X))
399
400        if scoring == "neg_brier_score":
401            return -skm2.brier_score_loss(y, self.predict_proba(X))
402
403        if scoring == "neg_log_loss":
404            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class GLMRegressor(nnetsauce.glm.glm.GLM, sklearn.base.RegressorMixin):
 22class GLMRegressor(GLM, RegressorMixin):
 23    """Generalized 'linear' models using quasi-randomized networks (regression)
 24
 25    Attributes:
 26
 27        n_hidden_features: int
 28            number of nodes in the hidden layer
 29
 30        lambda1: float
 31            regularization parameter for GLM coefficients on original features
 32
 33        alpha1: float
 34            controls compromize between l1 and l2 norm of GLM coefficients on original features
 35
 36        lambda2: float
 37            regularization parameter for GLM coefficients on nonlinear features
 38
 39        alpha2: float
 40            controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
 41
 42        family: str
 43            "gaussian", "laplace", "poisson", or "quantile" (for now)
 44
 45        level: int, default=50
 46            The level of the quantiles to compute for family = "quantile".
 47            Default is the median.
 48
 49        activation_name: str
 50            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 51
 52        a: float
 53            hyperparameter for 'prelu' or 'elu' activation function
 54
 55        nodes_sim: str
 56            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 57            'uniform'
 58
 59        bias: boolean
 60            indicates if the hidden layer contains a bias term (True) or not
 61            (False)
 62
 63        dropout: float
 64            regularization parameter; (random) percentage of nodes dropped out
 65            of the training
 66
 67        direct_link: boolean
 68            indicates if the original predictors are included (True) in model's
 69            fitting or not (False)
 70
 71        n_clusters: int
 72            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 73                no clustering)
 74
 75        cluster_encode: bool
 76            defines how the variable containing clusters is treated (default is one-hot)
 77            if `False`, then labels are used, without one-hot encoding
 78
 79        type_clust: str
 80            type of clustering method: currently k-means ('kmeans') or Gaussian
 81            Mixture Model ('gmm')
 82
 83        type_scaling: a tuple of 3 strings
 84            scaling methods for inputs, hidden layer, and clustering respectively
 85            (and when relevant).
 86            Currently available: standardization ('std') or MinMax scaling ('minmax')
 87
 88        optimizer: object
 89            optimizer, from class nnetsauce.utils.Optimizer
 90
 91        backend: str.
 92            "cpu" or "gpu" or "tpu".
 93
 94        seed: int
 95            reproducibility seed for nodes_sim=='uniform'
 96
 97        backend: str
 98            "cpu", "gpu", "tpu"
 99
100    Attributes:
101
102        beta_: vector
103            regression coefficients
104
105    Examples:
106
107        See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
108
109    """
110
111    # construct the object -----
112
113    def __init__(
114        self,
115        n_hidden_features=5,
116        lambda1=0.01,
117        alpha1=0.5,
118        lambda2=0.01,
119        alpha2=0.5,
120        family="gaussian",
121        level=50,
122        activation_name="relu",
123        a=0.01,
124        nodes_sim="sobol",
125        bias=True,
126        dropout=0,
127        direct_link=True,
128        n_clusters=2,
129        cluster_encode=True,
130        type_clust="kmeans",
131        type_scaling=("std", "std", "std"),
132        optimizer=Optimizer(),
133        backend="cpu",
134        seed=123,
135    ):
136        super().__init__(
137            n_hidden_features=n_hidden_features,
138            lambda1=lambda1,
139            alpha1=alpha1,
140            lambda2=lambda2,
141            alpha2=alpha2,
142            activation_name=activation_name,
143            a=a,
144            nodes_sim=nodes_sim,
145            bias=bias,
146            dropout=dropout,
147            direct_link=direct_link,
148            n_clusters=n_clusters,
149            cluster_encode=cluster_encode,
150            type_clust=type_clust,
151            type_scaling=type_scaling,
152            optimizer=optimizer,
153            backend=backend,
154            seed=seed,
155        )
156
157        self.family = family
158        self.level = level
159        self.q = self.level / 100
160
161    def gaussian_loss(self, y, row_index, XB):
162        return 0.5 * np.mean(np.square(y[row_index] - XB))
163
164    def laplace_loss(self, y, row_index, XB):
165        return 0.5 * np.mean(np.abs(y[row_index] - XB))
166
167    def poisson_loss(self, y, row_index, XB):
168        return -np.mean(y[row_index] * XB - np.exp(XB))
169
170    def pinball_loss(self, y, row_index, XB, tau=0.5):
171        y = np.array(y[row_index])
172        y_pred = np.array(XB)
173        return mean_pinball_loss(y, y_pred, alpha=tau)
174        # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals))
175
176    def loss_func(
177        self,
178        beta,
179        group_index,
180        X,
181        y,
182        row_index=None,
183        type_loss="gaussian",
184        **kwargs
185    ):
186        res = {
187            "gaussian": self.gaussian_loss,
188            "laplace": self.laplace_loss,
189            "poisson": self.poisson_loss,
190            "quantile": self.pinball_loss,
191        }
192
193        if type_loss != "quantile":
194            if row_index is None:
195                row_index = range(len(y))
196                XB = self.compute_XB(X, beta=beta)
197
198                return res[type_loss](y, row_index, XB) + self.compute_penalty(
199                    group_index=group_index, beta=beta
200                )
201
202            XB = self.compute_XB(X, beta=beta, row_index=row_index)
203
204            return res[type_loss](y, row_index, XB) + self.compute_penalty(
205                group_index=group_index, beta=beta
206            )
207
208        else:  # quantile
209            assert (
210                self.q > 0 and self.q < 1
211            ), "'tau' must be comprised 0 < tau < 1"
212
213            if row_index is None:
214                row_index = range(len(y))
215                XB = self.compute_XB(X, beta=beta)
216                return res[type_loss](y, row_index, XB, self.q)
217
218            XB = self.compute_XB(X, beta=beta, row_index=row_index)
219            return res[type_loss](y, row_index, XB, self.q)
220
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self
271
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )
305
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Generalized 'linear' models using quasi-randomized networks (regression)

Attributes:

n_hidden_features: int
    number of nodes in the hidden layer

lambda1: float
    regularization parameter for GLM coefficients on original features

alpha1: float
    controls compromize between l1 and l2 norm of GLM coefficients on original features

lambda2: float
    regularization parameter for GLM coefficients on nonlinear features

alpha2: float
    controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features

family: str
    "gaussian", "laplace", "poisson", or "quantile" (for now)

level: int, default=50
    The level of the quantiles to compute for family = "quantile".
    Default is the median.

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

optimizer: object
    optimizer, from class Optimizer

backend: str.
    "cpu" or "gpu" or "tpu".

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu", "gpu", "tpu"

Attributes:

beta_: vector
    regression coefficients

Examples:

See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
def fit(self, X, y, **kwargs):
221    def fit(self, X, y, **kwargs):
222        """Fit GLM model to training data (X, y).
223
224        Args:
225
226            X: {array-like}, shape = [n_samples, n_features]
227                Training vectors, where n_samples is the number
228                of samples and n_features is the number of features.
229
230            y: array-like, shape = [n_samples]
231                Target values.
232
233            **kwargs: additional parameters to be passed to
234                    self.cook_training_set or self.obj.fit
235
236        Returns:
237
238            self: object
239
240        """
241        self.beta_ = None
242        self.n_iter = 0
243
244        _, self.group_index = X.shape
245
246        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
247        # initialization
248        if self.backend == "cpu":
249            beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
250        else:
251            beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0]
252        # optimization
253        # fit(self, loss_func, response, x0, **kwargs):
254        # loss_func(self, beta, group_index, X, y,
255        #          row_index=None, type_loss="gaussian",
256        #          **kwargs)
257        self.optimizer.fit(
258            self.loss_func,
259            response=centered_y,
260            x0=beta_,
261            group_index=self.group_index,
262            X=scaled_Z,
263            y=centered_y,
264            type_loss=self.family,
265            **kwargs
266        )
267
268        self.beta_ = self.optimizer.results[0]
269
270        return self

Fit GLM model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
272    def predict(self, X, **kwargs):
273        """Predict test data X.
274
275        Args:
276
277            X: {array-like}, shape = [n_samples, n_features]
278                Training vectors, where n_samples is the number
279                of samples and n_features is the number of features.
280
281            **kwargs: additional parameters to be passed to
282                    self.cook_test_set
283
284        Returns:
285
286            model predictions: {array-like}
287
288        """
289
290        if len(X.shape) == 1:
291            n_features = X.shape[0]
292            new_X = mo.rbind(
293                X.reshape(1, n_features),
294                np.ones(n_features).reshape(1, n_features),
295            )
296
297            return (
298                self.y_mean_
299                + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_)
300            )[0]
301
302        return self.y_mean_ + np.dot(
303            self.cook_test_set(X, **kwargs), self.beta_
304        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def score(self, X, y, scoring=None):
306    def score(self, X, y, scoring=None):
307        """Compute the score of the model.
308
309        Parameters:
310
311            X: {array-like}, shape = [n_samples, n_features]
312                Training vectors, where n_samples is the number
313                of samples and n_features is the number of features.
314
315            y: array-like, shape = [n_samples]
316                Target values.
317
318            scoring: str
319                scoring method
320
321        Returns:
322
323            score: float
324
325        """
326
327        if scoring is None:
328            return np.sqrt(np.mean((self.predict(X) - y) ** 2))
329
330        return skm2.get_scorer(scoring)(self, X, y)

Compute the score of the model.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method

Returns:

score: float
class KernelRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 18class KernelRidge(BaseEstimator, RegressorMixin):
 19    """
 20    Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
 21
 22    Parameters:
 23    - alpha: float
 24        Regularization parameter.
 25    - kernel: str
 26        Kernel type ("linear", "rbf", or "matern").
 27    - gamma: float
 28        Kernel coefficient for "rbf". Ignored for other kernels.
 29    - nu: float
 30        Smoothness parameter for the Matérn kernel. Default is 1.5.
 31    - length_scale: float
 32        Length scale parameter for the Matérn kernel. Default is 1.0.
 33    - backend: str
 34        "cpu" or "gpu" (uses JAX if "gpu").
 35    """
 36
 37    def __init__(
 38        self,
 39        alpha=1.0,
 40        kernel="rbf",
 41        gamma=None,
 42        nu=1.5,
 43        length_scale=1.0,
 44        backend="cpu",
 45    ):
 46        if not JAX_AVAILABLE and backend != "cpu":
 47            raise RuntimeError(
 48                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 49            )
 50        self.alpha = alpha
 51        self.alpha_ = alpha
 52        self.kernel = kernel
 53        self.gamma = gamma
 54        self.nu = nu
 55        self.length_scale = length_scale
 56        self.backend = backend
 57        self.scaler = StandardScaler()
 58
 59        if backend == "gpu" and not JAX_AVAILABLE:
 60            raise ImportError(
 61                "JAX is not installed. Please install JAX to use the GPU backend."
 62            )
 63
 64    def _linear_kernel(self, X, Y):
 65        return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T)
 66
 67    def _rbf_kernel(self, X, Y):
 68        if self.gamma is None:
 69            self.gamma = 1.0 / X.shape[1]
 70        if self.backend == "gpu":
 71            sq_dists = (
 72                jnp.sum(X**2, axis=1)[:, None]
 73                + jnp.sum(Y**2, axis=1)
 74                - 2 * jnp.dot(X, Y.T)
 75            )
 76            return jnp.exp(-self.gamma * sq_dists)
 77        else:
 78            sq_dists = (
 79                np.sum(X**2, axis=1)[:, None]
 80                + np.sum(Y**2, axis=1)
 81                - 2 * np.dot(X, Y.T)
 82            )
 83            return np.exp(-self.gamma * sq_dists)
 84
 85    def _matern_kernel(self, X, Y):
 86        """
 87        Compute the Matérn kernel using JAX for GPU or NumPy for CPU.
 88
 89        Parameters:
 90        - X: array-like, shape (n_samples_X, n_features)
 91        - Y: array-like, shape (n_samples_Y, n_features)
 92
 93        Returns:
 94        - Kernel matrix, shape (n_samples_X, n_samples_Y)
 95        """
 96        if self.backend == "gpu":
 97            # Compute pairwise distances
 98            dists = jnp.sqrt(
 99                jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
100            )
101            scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale
102
103            # Matérn kernel formula
104            coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu))
105            matern_kernel = (
106                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
107            )
108            matern_kernel = jnp.where(
109                dists == 0, 1.0, matern_kernel
110            )  # Handle the case where distance is 0
111            return matern_kernel
112        else:
113            # Use NumPy for CPU
114            from scipy.special import (
115                gammaln,
116                kv,
117            )  # Ensure scipy.special is used for CPU
118
119            dists = np.sqrt(
120                np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
121            )
122            scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale
123
124            # Matérn kernel formula
125            coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu))
126            matern_kernel = (
127                coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists)
128            )
129            matern_kernel = np.where(
130                dists == 0, 1.0, matern_kernel
131            )  # Handle the case where distance is 0
132            return matern_kernel
133
134    def _get_kernel(self, X, Y):
135        if self.kernel == "linear":
136            return self._linear_kernel(X, Y)
137        elif self.kernel == "rbf":
138            return self._rbf_kernel(X, Y)
139        elif self.kernel == "matern":
140            return self._matern_kernel(X, Y)
141        else:
142            raise ValueError(f"Unsupported kernel: {self.kernel}")
143
144    def fit(self, X, y):
145        """
146        Fit the Kernel Ridge Regression model.
147
148        Parameters:
149        - X: array-like, shape (n_samples, n_features)
150            Training data.
151        - y: array-like, shape (n_samples,)
152            Target values.
153        """
154        # Standardize the inputs
155        X = self.scaler.fit_transform(X)
156        self.X_fit_ = X
157
158        # Center the response
159        self.y_mean_ = np.mean(y)
160        y_centered = y - self.y_mean_
161
162        n_samples = X.shape[0]
163
164        # Compute the kernel matrix
165        K = self._get_kernel(X, X)
166        self.K_ = K
167        self.y_fit_ = y_centered
168
169        if isinstance(self.alpha, (list, np.ndarray)):
170            # If alpha is a list or array, compute LOOE for each alpha
171            self.alphas_ = self.alpha  # Store the list of alphas
172            self.dual_coefs_ = []  # Store dual coefficients for each alpha
173            self.looe_ = []  # Store LOOE for each alpha
174
175            for alpha in self.alpha:
176                G = K + alpha * np.eye(n_samples)
177                G_inv = np.linalg.inv(G)
178                diag_G_inv = np.diag(G_inv)
179                dual_coef = np.linalg.solve(G, y_centered)
180                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
181                self.dual_coefs_.append(dual_coef)
182                self.looe_.append(looe)
183
184            # Select the best alpha based on the smallest LOOE
185            best_index = np.argmin(self.looe_)
186            self.alpha_ = self.alpha[best_index]
187            self.dual_coef_ = self.dual_coefs_[best_index]
188        else:
189            # If alpha is a single value, proceed as usual
190            if self.backend == "gpu":
191                self.dual_coef_ = jnp.linalg.solve(
192                    K + self.alpha * jnp.eye(n_samples), y_centered
193                )
194            else:
195                self.dual_coef_ = np.linalg.solve(
196                    K + self.alpha * np.eye(n_samples), y_centered
197                )
198
199        return self
200
201    def predict(self, X, probs=False):
202        """
203        Predict using the Kernel Ridge Regression model.
204
205        Parameters:
206        - X: array-like, shape (n_samples, n_features)
207            Test data.
208
209        Returns:
210        - Predicted values, shape (n_samples,).
211        """
212        # Standardize the inputs
213        X = self.scaler.transform(X)
214        K = self._get_kernel(X, self.X_fit_)
215        if self.backend == "gpu":
216            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
217            if probs:
218                # Compute similarity to self.X_fit_
219                similarities = jnp.dot(
220                    preds, self.X_fit_.T
221                )  # Shape: (n_samples, n_fit_)
222                # Apply softmax to get probabilities
223                return jaxsoftmax(similarities, axis=1)
224            return preds
225        else:
226            preds = np.dot(K, self.dual_coef_) + self.y_mean_
227            if probs:
228                # Compute similarity to self.X_fit_
229                similarities = np.dot(
230                    preds, self.X_fit_.T
231                )  # Shape: (n_samples, n_fit_)
232                # Apply softmax to get probabilities
233                return softmax(similarities, axis=1)
234            return preds
235
236    def partial_fit(self, X, y):
237        """
238        Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach.
239
240        Parameters:
241        - X: array-like, shape (n_samples, n_features)
242            New training data.
243        - y: array-like, shape (n_samples,)
244            New target values.
245
246        Returns:
247        - self: object
248            The updated model.
249        """
250        # Standardize the inputs
251        X = (
252            self.scaler.fit_transform(X)
253            if not hasattr(self, "X_fit_")
254            else self.scaler.transform(X)
255        )
256
257        if not hasattr(self, "X_fit_"):
258            # Initialize with the first batch of data
259            self.X_fit_ = X
260
261            # Center the response
262            self.y_mean_ = np.mean(y)
263            y_centered = y - self.y_mean_
264            self.y_fit_ = y_centered
265
266            n_samples = X.shape[0]
267
268            # Compute the kernel matrix for the initial data
269            self.K_ = self._get_kernel(X, X)
270
271            # Initialize dual coefficients for each alpha
272            if isinstance(self.alpha, (list, np.ndarray)):
273                self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha]
274            else:
275                self.dual_coef_ = np.zeros(n_samples)
276        else:
277            # Incrementally update with new data
278            y_centered = y - self.y_mean_  # Center the new batch of responses
279            for x_new, y_new in zip(X, y_centered):
280                x_new = x_new.reshape(1, -1)  # Ensure x_new is 2D
281                k_new = self._get_kernel(self.X_fit_, x_new).flatten()
282
283                # Compute the kernel value for the new data point
284                k_self = self._get_kernel(x_new, x_new).item()
285
286                if isinstance(self.alpha, (list, np.ndarray)):
287                    # Update dual coefficients for each alpha
288                    for idx, alpha in enumerate(self.alpha):
289                        gamma_new = 1 / (k_self + alpha)
290                        residual = y_new - np.dot(self.dual_coefs_[idx], k_new)
291                        self.dual_coefs_[idx] = np.append(
292                            self.dual_coefs_[idx], gamma_new * residual
293                        )
294                else:
295                    # Update dual coefficients for a single alpha
296                    gamma_new = 1 / (k_self + self.alpha)
297                    residual = y_new - np.dot(self.dual_coef_, k_new)
298                    self.dual_coef_ = np.append(
299                        self.dual_coef_, gamma_new * residual
300                    )
301
302                # Update the kernel matrix
303                self.K_ = np.block(
304                    [
305                        [self.K_, k_new[:, None]],
306                        [k_new[None, :], np.array([[k_self]])],
307                    ]
308                )
309
310                # Update the stored data
311                self.X_fit_ = np.vstack([self.X_fit_, x_new])
312                self.y_fit_ = np.append(self.y_fit_, y_new)
313
314        # Select the best alpha based on LOOE after the batch
315        if isinstance(self.alpha, (list, np.ndarray)):
316            self.looe_ = []
317            for idx, alpha in enumerate(self.alpha):
318                G = self.K_ + alpha * np.eye(self.K_.shape[0])
319                G_inv = np.linalg.inv(G)
320                diag_G_inv = np.diag(G_inv)
321                looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2)
322                self.looe_.append(looe)
323
324            # Select the best alpha
325            best_index = np.argmin(self.looe_)
326            self.alpha_ = self.alpha[best_index]
327            self.dual_coef_ = self.dual_coefs_[best_index]
328
329        return self

Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.

Parameters:

  • alpha: float Regularization parameter.
  • kernel: str Kernel type ("linear", "rbf", or "matern").
  • gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
  • nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
  • length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
  • backend: str "cpu" or "gpu" (uses JAX if "gpu").
def fit(self, X, y):
144    def fit(self, X, y):
145        """
146        Fit the Kernel Ridge Regression model.
147
148        Parameters:
149        - X: array-like, shape (n_samples, n_features)
150            Training data.
151        - y: array-like, shape (n_samples,)
152            Target values.
153        """
154        # Standardize the inputs
155        X = self.scaler.fit_transform(X)
156        self.X_fit_ = X
157
158        # Center the response
159        self.y_mean_ = np.mean(y)
160        y_centered = y - self.y_mean_
161
162        n_samples = X.shape[0]
163
164        # Compute the kernel matrix
165        K = self._get_kernel(X, X)
166        self.K_ = K
167        self.y_fit_ = y_centered
168
169        if isinstance(self.alpha, (list, np.ndarray)):
170            # If alpha is a list or array, compute LOOE for each alpha
171            self.alphas_ = self.alpha  # Store the list of alphas
172            self.dual_coefs_ = []  # Store dual coefficients for each alpha
173            self.looe_ = []  # Store LOOE for each alpha
174
175            for alpha in self.alpha:
176                G = K + alpha * np.eye(n_samples)
177                G_inv = np.linalg.inv(G)
178                diag_G_inv = np.diag(G_inv)
179                dual_coef = np.linalg.solve(G, y_centered)
180                looe = np.sum((dual_coef / diag_G_inv) ** 2)  # Compute LOOE
181                self.dual_coefs_.append(dual_coef)
182                self.looe_.append(looe)
183
184            # Select the best alpha based on the smallest LOOE
185            best_index = np.argmin(self.looe_)
186            self.alpha_ = self.alpha[best_index]
187            self.dual_coef_ = self.dual_coefs_[best_index]
188        else:
189            # If alpha is a single value, proceed as usual
190            if self.backend == "gpu":
191                self.dual_coef_ = jnp.linalg.solve(
192                    K + self.alpha * jnp.eye(n_samples), y_centered
193                )
194            else:
195                self.dual_coef_ = np.linalg.solve(
196                    K + self.alpha * np.eye(n_samples), y_centered
197                )
198
199        return self

Fit the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Training data.
  • y: array-like, shape (n_samples,) Target values.
def predict(self, X, probs=False):
201    def predict(self, X, probs=False):
202        """
203        Predict using the Kernel Ridge Regression model.
204
205        Parameters:
206        - X: array-like, shape (n_samples, n_features)
207            Test data.
208
209        Returns:
210        - Predicted values, shape (n_samples,).
211        """
212        # Standardize the inputs
213        X = self.scaler.transform(X)
214        K = self._get_kernel(X, self.X_fit_)
215        if self.backend == "gpu":
216            preds = jnp.dot(K, self.dual_coef_) + self.y_mean_
217            if probs:
218                # Compute similarity to self.X_fit_
219                similarities = jnp.dot(
220                    preds, self.X_fit_.T
221                )  # Shape: (n_samples, n_fit_)
222                # Apply softmax to get probabilities
223                return jaxsoftmax(similarities, axis=1)
224            return preds
225        else:
226            preds = np.dot(K, self.dual_coef_) + self.y_mean_
227            if probs:
228                # Compute similarity to self.X_fit_
229                similarities = np.dot(
230                    preds, self.X_fit_.T
231                )  # Shape: (n_samples, n_fit_)
232                # Apply softmax to get probabilities
233                return softmax(similarities, axis=1)
234            return preds

Predict using the Kernel Ridge Regression model.

Parameters:

  • X: array-like, shape (n_samples, n_features) Test data.

Returns:

  • Predicted values, shape (n_samples,).
class LazyClassifier(nnetsauce.LazyDeepClassifier):
757class LazyClassifier(LazyDeepClassifier):
758    """
759        Fitting -- almost -- all the classification algorithms with
760        nnetsauce's CustomClassifier and returning their scores (no layers).
761
762    Parameters:
763
764        verbose: int, optional (default=0)
765            Any positive number for verbosity.
766
767        ignore_warnings: bool, optional (default=True)
768            When set to True, the warning related to algorigms that are not able to run are ignored.
769
770        custom_metric: function, optional (default=None)
771            When function is provided, models are evaluated based on the custom evaluation metric provided.
772
773        predictions: bool, optional (default=False)
774            When set to True, the predictions of all the models models are returned as dataframe.
775
776        sort_by: string, optional (default='Accuracy')
777            Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
778            or a custom metric identified by its name and provided by custom_metric.
779
780        random_state: int, optional (default=42)
781            Reproducibiility seed.
782
783        estimators: list, optional (default='all')
784            list of Estimators names or just 'all' (default='all')
785
786        preprocess: bool
787            preprocessing is done when set to True
788
789        n_jobs : int, when possible, run in parallel
790            For now, only used by individual models that support it.
791
792        All the other parameters are the same as CustomClassifier's.
793
794    Attributes:
795
796        models_: dict-object
797            Returns a dictionary with each model pipeline as value
798            with key as name of models.
799
800        best_model_: object
801            Returns the best model pipeline based on the sort_by metric.
802
803    Examples:
804
805        import nnetsauce as ns
806        import numpy as np
807        from sklearn import datasets
808        from sklearn.utils import shuffle
809
810        dataset = datasets.load_iris()
811        X = dataset.data
812        y = dataset.target
813        X, y = shuffle(X, y, random_state=123)
814        X = X.astype(np.float32)
815        y = y.astype(np.float32)
816        X_train, X_test = X[:100], X[100:]
817        y_train, y_test = y[:100], y[100:]
818
819        clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
820        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
821        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
822        print(models)
823
824    """
825
826    def __init__(
827        self,
828        verbose=0,
829        ignore_warnings=True,
830        custom_metric=None,
831        predictions=False,
832        sort_by="Accuracy",
833        random_state=42,
834        estimators="all",
835        preprocess=False,
836        n_jobs=None,
837        # CustomClassifier attributes
838        obj=None,
839        n_hidden_features=5,
840        activation_name="relu",
841        a=0.01,
842        nodes_sim="sobol",
843        bias=True,
844        dropout=0,
845        direct_link=True,
846        n_clusters=2,
847        cluster_encode=True,
848        type_clust="kmeans",
849        type_scaling=("std", "std", "std"),
850        col_sample=1,
851        row_sample=1,
852        seed=123,
853        backend="cpu",
854    ):
855        super().__init__(
856            verbose=verbose,
857            ignore_warnings=ignore_warnings,
858            custom_metric=custom_metric,
859            predictions=predictions,
860            sort_by=sort_by,
861            random_state=random_state,
862            estimators=estimators,
863            preprocess=preprocess,
864            n_jobs=n_jobs,
865            n_layers=1,
866            obj=obj,
867            n_hidden_features=n_hidden_features,
868            activation_name=activation_name,
869            a=a,
870            nodes_sim=nodes_sim,
871            bias=bias,
872            dropout=dropout,
873            direct_link=direct_link,
874            n_clusters=n_clusters,
875            cluster_encode=cluster_encode,
876            type_clust=type_clust,
877            type_scaling=type_scaling,
878            col_sample=col_sample,
879            row_sample=row_sample,
880            seed=seed,
881            backend=backend,
882        )

Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]

clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
class LazyRegressor(nnetsauce.LazyDeepRegressor):
654class LazyRegressor(LazyDeepRegressor):
655    """
656        Fitting -- almost -- all the regression algorithms with
657        nnetsauce's CustomRegressor and returning their scores.
658
659    Parameters:
660
661        verbose: int, optional (default=0)
662            Any positive number for verbosity.
663
664        ignore_warnings: bool, optional (default=True)
665            When set to True, the warning related to algorigms that are not able to run are ignored.
666
667        custom_metric: function, optional (default=None)
668            When function is provided, models are evaluated based on the custom evaluation metric provided.
669
670        predictions: bool, optional (default=False)
671            When set to True, the predictions of all the models models are returned as dataframe.
672
673        sort_by: string, optional (default='RMSE')
674            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
675            or a custom metric identified by its name and provided by custom_metric.
676
677        random_state: int, optional (default=42)
678            Reproducibiility seed.
679
680        estimators: list, optional (default='all')
681            list of Estimators names or just 'all' (default='all')
682
683        preprocess: bool
684            preprocessing is done when set to True
685
686        n_jobs : int, when possible, run in parallel
687            For now, only used by individual models that support it.
688
689        All the other parameters are the same as CustomRegressor's.
690
691    Attributes:
692
693        models_: dict-object
694            Returns a dictionary with each model pipeline as value
695            with key as name of models.
696
697        best_model_: object
698            Returns the best model pipeline based on the sort_by metric.
699
700    Examples:
701
702        import nnetsauce as ns
703        import numpy as np
704        from sklearn import datasets
705        from sklearn.utils import shuffle
706
707        diabetes = datasets.load_diabetes()
708        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
709        X = X.astype(np.float32)
710
711        offset = int(X.shape[0] * 0.9)
712        X_train, y_train = X[:offset], y[:offset]
713        X_test, y_test = X[offset:], y[offset:]
714
715        reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
716                            custom_metric=None)
717        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
718        print(models)
719
720    """
721
722    def __init__(
723        self,
724        verbose=0,
725        ignore_warnings=True,
726        custom_metric=None,
727        predictions=False,
728        sort_by="RMSE",
729        random_state=42,
730        estimators="all",
731        preprocess=False,
732        n_jobs=None,
733        # CustomRegressor attributes
734        obj=None,
735        n_hidden_features=5,
736        activation_name="relu",
737        a=0.01,
738        nodes_sim="sobol",
739        bias=True,
740        dropout=0,
741        direct_link=True,
742        n_clusters=2,
743        cluster_encode=True,
744        type_clust="kmeans",
745        type_scaling=("std", "std", "std"),
746        col_sample=1,
747        row_sample=1,
748        seed=123,
749        backend="cpu",
750    ):
751        super().__init__(
752            verbose=verbose,
753            ignore_warnings=ignore_warnings,
754            custom_metric=custom_metric,
755            predictions=predictions,
756            sort_by=sort_by,
757            random_state=random_state,
758            estimators=estimators,
759            preprocess=preprocess,
760            n_jobs=n_jobs,
761            n_layers=1,
762            obj=obj,
763            n_hidden_features=n_hidden_features,
764            activation_name=activation_name,
765            a=a,
766            nodes_sim=nodes_sim,
767            bias=bias,
768            dropout=dropout,
769            direct_link=direct_link,
770            n_clusters=n_clusters,
771            cluster_encode=cluster_encode,
772            type_clust=type_clust,
773            type_scaling=type_scaling,
774            col_sample=col_sample,
775            row_sample=row_sample,
776            seed=seed,
777            backend=backend,
778        )

Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
                    custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
class LazyDeepClassifier(nnetsauce.custom.custom.Custom, sklearn.base.ClassifierMixin):
 94class LazyDeepClassifier(Custom, ClassifierMixin):
 95    """
 96
 97    Fitting -- almost -- all the classification algorithms with layers of
 98    nnetsauce's CustomClassifier and returning their scores.
 99
100    Parameters:
101
102        verbose: int, optional (default=0)
103            Any positive number for verbosity.
104
105        ignore_warnings: bool, optional (default=True)
106            When set to True, the warning related to algorigms that are not
107            able to run are ignored.
108
109        custom_metric: function, optional (default=None)
110            When function is provided, models are evaluated based on the custom
111              evaluation metric provided.
112
113        predictions: bool, optional (default=False)
114            When set to True, the predictions of all the models models are
115            returned as data frame.
116
117        sort_by: string, optional (default='Accuracy')
118            Sort models by a metric. Available options are 'Accuracy',
119            'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
120            identified by its name and provided by custom_metric.
121
122        random_state: int, optional (default=42)
123            Reproducibiility seed.
124
125        estimators: list, optional (default='all')
126            list of Estimators names or just 'all' for > 90 classifiers
127            (default='all')
128
129        preprocess: bool, preprocessing is done when set to True
130
131        n_jobs: int, when possible, run in parallel
132            For now, only used by individual models that support it.
133
134        n_layers: int, optional (default=3)
135            Number of layers of CustomClassifiers to be used.
136
137        All the other parameters are the same as CustomClassifier's.
138
139    Attributes:
140
141        models_: dict-object
142            Returns a dictionary with each model pipeline as value
143            with key as name of models.
144
145        best_model_: object
146            Returns the best model pipeline.
147
148    Examples
149
150        ```python
151        import nnetsauce as ns
152        from sklearn.datasets import load_breast_cancer
153        from sklearn.model_selection import train_test_split
154        data = load_breast_cancer()
155        X = data.data
156        y= data.target
157        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
158            random_state=123)
159        clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
160        models, predictions = clf.fit(X_train, X_test, y_train, y_test)
161        model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
162        print(models)
163        ```
164
165    """
166
167    def __init__(
168        self,
169        verbose=0,
170        ignore_warnings=True,
171        custom_metric=None,
172        predictions=False,
173        sort_by="Accuracy",
174        random_state=42,
175        estimators="all",
176        preprocess=False,
177        n_jobs=None,
178        # Defining depth
179        n_layers=3,
180        # CustomClassifier attributes
181        obj=None,
182        n_hidden_features=5,
183        activation_name="relu",
184        a=0.01,
185        nodes_sim="sobol",
186        bias=True,
187        dropout=0,
188        direct_link=True,
189        n_clusters=2,
190        cluster_encode=True,
191        type_clust="kmeans",
192        type_scaling=("std", "std", "std"),
193        col_sample=1,
194        row_sample=1,
195        seed=123,
196        backend="cpu",
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers - 1
209        self.n_jobs = n_jobs
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            col_sample=col_sample,
224            row_sample=row_sample,
225            seed=seed,
226            backend=backend,
227        )
228
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores
710
711    def get_best_model(self):
712        """
713        This function returns the best model pipeline based on the sort_by metric.
714
715        Returns:
716
717            best_model: object,
718                Returns the best model pipeline based on the sort_by metric.
719
720        """
721        return self.best_model_
722
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are
    returned as data frame.

sort_by: string, optional (default='Accuracy')
    Sort models by a metric. Available options are 'Accuracy',
    'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
    identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' for > 90 classifiers
    (default='all')

preprocess: bool, preprocessing is done when set to True

n_jobs: int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomClassifiers to be used.

All the other parameters are the same as CustomClassifier's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline.

Examples

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
    random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
229    def fit(self, X_train, X_test, y_train, y_test):
230        """Fit classifiers to X_train and y_train, predict and score on X_test,
231        y_test.
232
233        Parameters:
234
235            X_train: array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            X_test: array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243            y_train: array-like,
244                Training vectors, where rows is the number of samples
245                and columns is the number of features.
246
247            y_test: array-like,
248                Testing vectors, where rows is the number of samples
249                and columns is the number of features.
250
251        Returns:
252
253            scores: Pandas DataFrame
254                Returns metrics of all the models in a Pandas DataFrame.
255
256            predictions: Pandas DataFrame
257                Returns predictions of all the models in a Pandas DataFrame.
258        """
259        Accuracy = []
260        B_Accuracy = []
261        ROC_AUC = []
262        F1 = []
263        names = []
264        TIME = []
265        predictions = {}
266
267        if self.custom_metric is not None:
268            CUSTOM_METRIC = []
269
270        if isinstance(X_train, np.ndarray):
271            X_train = pd.DataFrame(X_train)
272            X_test = pd.DataFrame(X_test)
273
274        numeric_features = X_train.select_dtypes(include=[np.number]).columns
275        categorical_features = X_train.select_dtypes(include=["object"]).columns
276
277        categorical_low, categorical_high = get_card_split(
278            X_train, categorical_features
279        )
280
281        if self.preprocess is True:
282            preprocessor = ColumnTransformer(
283                transformers=[
284                    ("numeric", numeric_transformer, numeric_features),
285                    (
286                        "categorical_low",
287                        categorical_transformer_low,
288                        categorical_low,
289                    ),
290                    (
291                        "categorical_high",
292                        categorical_transformer_high,
293                        categorical_high,
294                    ),
295                ]
296            )
297
298        # baseline models
299        try:
300            baseline_names = ["RandomForestClassifier", "XGBClassifier"]
301            baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()]
302        except Exception as exception:
303            baseline_names = ["RandomForestClassifier"]
304            baseline_models = [RandomForestClassifier()]
305
306        for name, model in zip(baseline_names, baseline_models):
307            start = time.time()
308            try:
309                model.fit(X_train, y_train)
310                self.models_[name] = model
311                y_pred = model.predict(X_test)
312                accuracy = accuracy_score(y_test, y_pred, normalize=True)
313                b_accuracy = balanced_accuracy_score(y_test, y_pred)
314                f1 = f1_score(y_test, y_pred, average="weighted")
315                try:
316                    roc_auc = roc_auc_score(y_test, y_pred)
317                except Exception as exception:
318                    roc_auc = None
319                    if self.ignore_warnings is False:
320                        print("ROC AUC couldn't be calculated for " + name)
321                        print(exception)
322                names.append(name)
323                Accuracy.append(accuracy)
324                B_Accuracy.append(b_accuracy)
325                ROC_AUC.append(roc_auc)
326                F1.append(f1)
327                TIME.append(time.time() - start)
328                if self.custom_metric is not None:
329                    custom_metric = self.custom_metric(y_test, y_pred)
330                    CUSTOM_METRIC.append(custom_metric)
331                if self.verbose > 0:
332                    if self.custom_metric is not None:
333                        print(
334                            {
335                                "Model": name,
336                                "Accuracy": accuracy,
337                                "Balanced Accuracy": b_accuracy,
338                                "ROC AUC": roc_auc,
339                                "F1 Score": f1,
340                                self.custom_metric.__name__: custom_metric,
341                                "Time taken": time.time() - start,
342                            }
343                        )
344                    else:
345                        print(
346                            {
347                                "Model": name,
348                                "Accuracy": accuracy,
349                                "Balanced Accuracy": b_accuracy,
350                                "ROC AUC": roc_auc,
351                                "F1 Score": f1,
352                                "Time taken": time.time() - start,
353                            }
354                        )
355                if self.predictions:
356                    predictions[name] = y_pred
357            except Exception as exception:
358                if self.ignore_warnings is False:
359                    print(name + " model failed to execute")
360                    print(exception)
361
362        if self.estimators == "all":
363            self.classifiers = [
364                item
365                for sublist in [
366                    DEEPCLASSIFIERS,
367                    DEEPMULTITASKCLASSIFIERS,
368                    DEEPSIMPLEMULTITASKCLASSIFIERS,
369                ]
370                for item in sublist
371            ]
372        else:
373            self.classifiers = (
374                [
375                    ("DeepCustomClassifier(" + est[0] + ")", est[1])
376                    for est in all_estimators()
377                    if (
378                        issubclass(est[1], ClassifierMixin)
379                        and (est[0] in self.estimators)
380                    )
381                ]
382                + [
383                    (
384                        "DeepMultitaskClassifier(" + est[0] + ")",
385                        partial(MultitaskClassifier, obj=est[1]()),
386                    )
387                    for est in all_estimators()
388                    if (
389                        issubclass(est[1], RegressorMixin)
390                        and (est[0] in self.estimators)
391                    )
392                ]
393                + [
394                    (
395                        "DeepSimpleMultitaskClassifier(" + est[0] + ")",
396                        partial(SimpleMultitaskClassifier, obj=est[1]()),
397                    )
398                    for est in all_estimators()
399                    if (
400                        issubclass(est[1], RegressorMixin)
401                        and (est[0] in self.estimators)
402                    )
403                ]
404            )
405
406        if self.preprocess is True:
407            for name, model in tqdm(self.classifiers):  # do parallel exec
408                other_args = (
409                    {}
410                )  # use this trick for `random_state` too --> refactor
411                try:
412                    if (
413                        "n_jobs" in model().get_params().keys()
414                        and name.find("LogisticRegression") == -1
415                    ):
416                        other_args["n_jobs"] = self.n_jobs
417                except Exception:
418                    pass
419
420                start = time.time()
421
422                try:
423                    if "random_state" in model().get_params().keys():
424                        layer_clf = CustomClassifier(
425                            obj=model(random_state=self.random_state),
426                            n_hidden_features=self.n_hidden_features,
427                            activation_name=self.activation_name,
428                            a=self.a,
429                            nodes_sim=self.nodes_sim,
430                            bias=self.bias,
431                            dropout=self.dropout,
432                            direct_link=self.direct_link,
433                            n_clusters=self.n_clusters,
434                            cluster_encode=self.cluster_encode,
435                            type_clust=self.type_clust,
436                            type_scaling=self.type_scaling,
437                            col_sample=self.col_sample,
438                            row_sample=self.row_sample,
439                            seed=self.seed,
440                            backend=self.backend,
441                            cv_calibration=None,
442                        )
443
444                    else:
445                        layer_clf = CustomClassifier(
446                            obj=model(),
447                            n_hidden_features=self.n_hidden_features,
448                            activation_name=self.activation_name,
449                            a=self.a,
450                            nodes_sim=self.nodes_sim,
451                            bias=self.bias,
452                            dropout=self.dropout,
453                            direct_link=self.direct_link,
454                            n_clusters=self.n_clusters,
455                            cluster_encode=self.cluster_encode,
456                            type_clust=self.type_clust,
457                            type_scaling=self.type_scaling,
458                            col_sample=self.col_sample,
459                            row_sample=self.row_sample,
460                            seed=self.seed,
461                            backend=self.backend,
462                            cv_calibration=None,
463                        )
464
465                    layer_clf.fit(X_train, y_train)
466
467                    for _ in range(self.n_layers):
468                        layer_clf = deepcopy(
469                            CustomClassifier(
470                                obj=layer_clf,
471                                n_hidden_features=self.n_hidden_features,
472                                activation_name=self.activation_name,
473                                a=self.a,
474                                nodes_sim=self.nodes_sim,
475                                bias=self.bias,
476                                dropout=self.dropout,
477                                direct_link=self.direct_link,
478                                n_clusters=self.n_clusters,
479                                cluster_encode=self.cluster_encode,
480                                type_clust=self.type_clust,
481                                type_scaling=self.type_scaling,
482                                col_sample=self.col_sample,
483                                row_sample=self.row_sample,
484                                seed=self.seed,
485                                backend=self.backend,
486                                cv_calibration=None,
487                            )
488                        )
489
490                    pipe = Pipeline(
491                        [
492                            ("preprocessor", preprocessor),
493                            ("classifier", layer_clf),
494                        ]
495                    )
496
497                    pipe.fit(X_train, y_train)
498                    self.models_[name] = pipe
499                    y_pred = pipe.predict(X_test)
500                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
501                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
502                    f1 = f1_score(y_test, y_pred, average="weighted")
503                    try:
504                        roc_auc = roc_auc_score(y_test, y_pred)
505                    except Exception as exception:
506                        roc_auc = None
507                        if self.ignore_warnings is False:
508                            print("ROC AUC couldn't be calculated for " + name)
509                            print(exception)
510                    names.append(name)
511                    Accuracy.append(accuracy)
512                    B_Accuracy.append(b_accuracy)
513                    ROC_AUC.append(roc_auc)
514                    F1.append(f1)
515                    TIME.append(time.time() - start)
516                    if self.custom_metric is not None:
517                        custom_metric = self.custom_metric(y_test, y_pred)
518                        CUSTOM_METRIC.append(custom_metric)
519                    if self.verbose > 0:
520                        if self.custom_metric is not None:
521                            print(
522                                {
523                                    "Model": name,
524                                    "Accuracy": accuracy,
525                                    "Balanced Accuracy": b_accuracy,
526                                    "ROC AUC": roc_auc,
527                                    "F1 Score": f1,
528                                    self.custom_metric.__name__: custom_metric,
529                                    "Time taken": time.time() - start,
530                                }
531                            )
532                        else:
533                            print(
534                                {
535                                    "Model": name,
536                                    "Accuracy": accuracy,
537                                    "Balanced Accuracy": b_accuracy,
538                                    "ROC AUC": roc_auc,
539                                    "F1 Score": f1,
540                                    "Time taken": time.time() - start,
541                                }
542                            )
543                    if self.predictions:
544                        predictions[name] = y_pred
545                except Exception as exception:
546                    if self.ignore_warnings is False:
547                        print(name + " model failed to execute")
548                        print(exception)
549
550        else:  # no preprocessing
551            for name, model in tqdm(self.classifiers):  # do parallel exec
552                start = time.time()
553                try:
554                    if "random_state" in model().get_params().keys():
555                        layer_clf = CustomClassifier(
556                            obj=model(random_state=self.random_state),
557                            n_hidden_features=self.n_hidden_features,
558                            activation_name=self.activation_name,
559                            a=self.a,
560                            nodes_sim=self.nodes_sim,
561                            bias=self.bias,
562                            dropout=self.dropout,
563                            direct_link=self.direct_link,
564                            n_clusters=self.n_clusters,
565                            cluster_encode=self.cluster_encode,
566                            type_clust=self.type_clust,
567                            type_scaling=self.type_scaling,
568                            col_sample=self.col_sample,
569                            row_sample=self.row_sample,
570                            seed=self.seed,
571                            backend=self.backend,
572                            cv_calibration=None,
573                        )
574
575                    else:
576                        layer_clf = CustomClassifier(
577                            obj=model(),
578                            n_hidden_features=self.n_hidden_features,
579                            activation_name=self.activation_name,
580                            a=self.a,
581                            nodes_sim=self.nodes_sim,
582                            bias=self.bias,
583                            dropout=self.dropout,
584                            direct_link=self.direct_link,
585                            n_clusters=self.n_clusters,
586                            cluster_encode=self.cluster_encode,
587                            type_clust=self.type_clust,
588                            type_scaling=self.type_scaling,
589                            col_sample=self.col_sample,
590                            row_sample=self.row_sample,
591                            seed=self.seed,
592                            backend=self.backend,
593                            cv_calibration=None,
594                        )
595
596                    layer_clf.fit(X_train, y_train)
597
598                    for _ in range(self.n_layers):
599                        layer_clf = deepcopy(
600                            CustomClassifier(
601                                obj=layer_clf,
602                                n_hidden_features=self.n_hidden_features,
603                                activation_name=self.activation_name,
604                                a=self.a,
605                                nodes_sim=self.nodes_sim,
606                                bias=self.bias,
607                                dropout=self.dropout,
608                                direct_link=self.direct_link,
609                                n_clusters=self.n_clusters,
610                                cluster_encode=self.cluster_encode,
611                                type_clust=self.type_clust,
612                                type_scaling=self.type_scaling,
613                                col_sample=self.col_sample,
614                                row_sample=self.row_sample,
615                                seed=self.seed,
616                                backend=self.backend,
617                                cv_calibration=None,
618                            )
619                        )
620
621                        # layer_clf.fit(X_train, y_train)
622
623                    layer_clf.fit(X_train, y_train)
624
625                    self.models_[name] = layer_clf
626                    y_pred = layer_clf.predict(X_test)
627                    accuracy = accuracy_score(y_test, y_pred, normalize=True)
628                    b_accuracy = balanced_accuracy_score(y_test, y_pred)
629                    f1 = f1_score(y_test, y_pred, average="weighted")
630                    try:
631                        roc_auc = roc_auc_score(y_test, y_pred)
632                    except Exception as exception:
633                        roc_auc = None
634                        if self.ignore_warnings is False:
635                            print("ROC AUC couldn't be calculated for " + name)
636                            print(exception)
637                    names.append(name)
638                    Accuracy.append(accuracy)
639                    B_Accuracy.append(b_accuracy)
640                    ROC_AUC.append(roc_auc)
641                    F1.append(f1)
642                    TIME.append(time.time() - start)
643                    if self.custom_metric is not None:
644                        custom_metric = self.custom_metric(y_test, y_pred)
645                        CUSTOM_METRIC.append(custom_metric)
646                    if self.verbose > 0:
647                        if self.custom_metric is not None:
648                            print(
649                                {
650                                    "Model": name,
651                                    "Accuracy": accuracy,
652                                    "Balanced Accuracy": b_accuracy,
653                                    "ROC AUC": roc_auc,
654                                    "F1 Score": f1,
655                                    self.custom_metric.__name__: custom_metric,
656                                    "Time taken": time.time() - start,
657                                }
658                            )
659                        else:
660                            print(
661                                {
662                                    "Model": name,
663                                    "Accuracy": accuracy,
664                                    "Balanced Accuracy": b_accuracy,
665                                    "ROC AUC": roc_auc,
666                                    "F1 Score": f1,
667                                    "Time taken": time.time() - start,
668                                }
669                            )
670                    if self.predictions:
671                        predictions[name] = y_pred
672                except Exception as exception:
673                    if self.ignore_warnings is False:
674                        print(name + " model failed to execute")
675                        print(exception)
676
677        if self.custom_metric is None:
678            scores = pd.DataFrame(
679                {
680                    "Model": names,
681                    "Accuracy": Accuracy,
682                    "Balanced Accuracy": B_Accuracy,
683                    "ROC AUC": ROC_AUC,
684                    "F1 Score": F1,
685                    "Time Taken": TIME,
686                }
687            )
688        else:
689            scores = pd.DataFrame(
690                {
691                    "Model": names,
692                    "Accuracy": Accuracy,
693                    "Balanced Accuracy": B_Accuracy,
694                    "ROC AUC": ROC_AUC,
695                    "F1 Score": F1,
696                    "Custom metric": CUSTOM_METRIC,
697                    "Time Taken": TIME,
698                }
699            )
700        scores = scores.sort_values(by=self.sort_by, ascending=False).set_index(
701            "Model"
702        )
703
704        self.best_model_ = self.models_[scores.index[0]]
705
706        if self.predictions is True:
707            return scores, predictions
708
709        return scores

Fit classifiers to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train: array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test: array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test, y_train, y_test):
723    def provide_models(self, X_train, X_test, y_train, y_test):
724        """Returns all the model objects trained. If fit hasn't been called yet,
725        then it's called to return the models.
726
727        Parameters:
728
729        X_train: array-like,
730            Training vectors, where rows is the number of samples
731            and columns is the number of features.
732
733        X_test: array-like,
734            Testing vectors, where rows is the number of samples
735            and columns is the number of features.
736
737        y_train: array-like,
738            Training vectors, where rows is the number of samples
739            and columns is the number of features.
740
741        y_test: array-like,
742            Testing vectors, where rows is the number of samples
743            and columns is the number of features.
744
745        Returns:
746
747            models: dict-object,
748                Returns a dictionary with each model's pipeline as value
749                and key = name of the model.
750        """
751        if len(self.models_.keys()) == 0:
752            self.fit(X_train, X_test, y_train, y_test)
753
754        return self.models_

Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.

Parameters:

X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.

y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model's pipeline as value
    and key = name of the model.
class LazyDeepRegressor(nnetsauce.custom.custom.Custom, sklearn.base.RegressorMixin):
 90class LazyDeepRegressor(Custom, RegressorMixin):
 91    """
 92        Fitting -- almost -- all the regression algorithms with layers of
 93        nnetsauce's CustomRegressor and returning their scores.
 94
 95    Parameters:
 96
 97        verbose: int, optional (default=0)
 98            Any positive number for verbosity.
 99
100        ignore_warnings: bool, optional (default=True)
101            When set to True, the warning related to algorigms that are not able to run are ignored.
102
103        custom_metric: function, optional (default=None)
104            When function is provided, models are evaluated based on the custom evaluation metric provided.
105
106        predictions: bool, optional (default=False)
107            When set to True, the predictions of all the models models are returned as dataframe.
108
109        sort_by: string, optional (default='RMSE')
110            Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
111            or a custom metric identified by its name and provided by custom_metric.
112
113        random_state: int, optional (default=42)
114            Reproducibiility seed.
115
116        estimators: list, optional (default='all')
117            list of Estimators names or just 'all' (default='all')
118
119        preprocess: bool
120            preprocessing is done when set to True
121
122        n_jobs : int, when possible, run in parallel
123            For now, only used by individual models that support it.
124
125        n_layers: int, optional (default=3)
126            Number of layers of CustomRegressors to be used.
127
128        All the other parameters are the same as CustomRegressor's.
129
130    Attributes:
131
132        models_: dict-object
133            Returns a dictionary with each model pipeline as value
134            with key as name of models.
135
136        best_model_: object
137            Returns the best model pipeline based on the sort_by metric.
138
139    Examples:
140
141        import nnetsauce as ns
142        import numpy as np
143        from sklearn import datasets
144        from sklearn.utils import shuffle
145
146        diabetes = datasets.load_diabetes()
147        X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
148        X = X.astype(np.float32)
149
150        offset = int(X.shape[0] * 0.9)
151        X_train, y_train = X[:offset], y[:offset]
152        X_test, y_test = X[offset:], y[offset:]
153
154        reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
155        models, predictions = reg.fit(X_train, X_test, y_train, y_test)
156        print(models)
157
158    """
159
160    def __init__(
161        self,
162        verbose=0,
163        ignore_warnings=True,
164        custom_metric=None,
165        predictions=False,
166        sort_by="RMSE",
167        random_state=42,
168        estimators="all",
169        preprocess=False,
170        n_jobs=None,
171        # Defining depth
172        n_layers=3,
173        # CustomRegressor attributes
174        obj=None,
175        n_hidden_features=5,
176        activation_name="relu",
177        a=0.01,
178        nodes_sim="sobol",
179        bias=True,
180        dropout=0,
181        direct_link=True,
182        n_clusters=2,
183        cluster_encode=True,
184        type_clust="kmeans",
185        type_scaling=("std", "std", "std"),
186        col_sample=1,
187        row_sample=1,
188        seed=123,
189        backend="cpu",
190    ):
191        self.verbose = verbose
192        self.ignore_warnings = ignore_warnings
193        self.custom_metric = custom_metric
194        self.predictions = predictions
195        self.sort_by = sort_by
196        self.models_ = {}
197        self.best_model_ = None
198        self.random_state = random_state
199        self.estimators = estimators
200        self.preprocess = preprocess
201        self.n_layers = n_layers - 1
202        self.n_jobs = n_jobs
203        super().__init__(
204            obj=obj,
205            n_hidden_features=n_hidden_features,
206            activation_name=activation_name,
207            a=a,
208            nodes_sim=nodes_sim,
209            bias=bias,
210            dropout=dropout,
211            direct_link=direct_link,
212            n_clusters=n_clusters,
213            cluster_encode=cluster_encode,
214            type_clust=type_clust,
215            type_scaling=type_scaling,
216            col_sample=col_sample,
217            row_sample=row_sample,
218            seed=seed,
219            backend=backend,
220        )
221
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[self.custom_metric.__name__] = (
460                                custom_metric
461                            )
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[self.custom_metric.__name__] = (
572                                custom_metric
573                            )
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores
605
606    def get_best_model(self):
607        """
608        This function returns the best model pipeline based on the sort_by metric.
609
610        Returns:
611
612            best_model: object,
613                Returns the best model pipeline based on the sort_by metric.
614
615        """
616        return self.best_model_
617
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
    or a custom metric identified by its name and provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators names or just 'all' (default='all')

preprocess: bool
    preprocessing is done when set to True

n_jobs : int, when possible, run in parallel
    For now, only used by individual models that support it.

n_layers: int, optional (default=3)
    Number of layers of CustomRegressors to be used.

All the other parameters are the same as CustomRegressor's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle

diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
def fit(self, X_train, X_test, y_train, y_test):
222    def fit(self, X_train, X_test, y_train, y_test):
223        """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
224
225        Parameters:
226
227            X_train : array-like,
228                Training vectors, where rows is the number of samples
229                and columns is the number of features.
230
231            X_test : array-like,
232                Testing vectors, where rows is the number of samples
233                and columns is the number of features.
234
235            y_train : array-like,
236                Training vectors, where rows is the number of samples
237                and columns is the number of features.
238
239            y_test : array-like,
240                Testing vectors, where rows is the number of samples
241                and columns is the number of features.
242
243        Returns:
244        -------
245        scores:  Pandas DataFrame
246            Returns metrics of all the models in a Pandas DataFrame.
247
248        predictions : Pandas DataFrame
249            Returns predictions of all the models in a Pandas DataFrame.
250
251        """
252        R2 = []
253        ADJR2 = []
254        RMSE = []
255        # WIN = []
256        names = []
257        TIME = []
258        predictions = {}
259
260        if self.custom_metric:
261            CUSTOM_METRIC = []
262
263        if isinstance(X_train, np.ndarray):
264            X_train = pd.DataFrame(X_train)
265            X_test = pd.DataFrame(X_test)
266
267        numeric_features = X_train.select_dtypes(include=[np.number]).columns
268        categorical_features = X_train.select_dtypes(include=["object"]).columns
269
270        categorical_low, categorical_high = get_card_split(
271            X_train, categorical_features
272        )
273
274        if self.preprocess is True:
275            preprocessor = ColumnTransformer(
276                transformers=[
277                    ("numeric", numeric_transformer, numeric_features),
278                    (
279                        "categorical_low",
280                        categorical_transformer_low,
281                        categorical_low,
282                    ),
283                    (
284                        "categorical_high",
285                        categorical_transformer_high,
286                        categorical_high,
287                    ),
288                ]
289            )
290
291        # base models
292        try:
293            baseline_names = ["RandomForestRegressor", "XGBRegressor"]
294            baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()]
295        except Exception as exception:
296            baseline_names = ["RandomForestRegressor"]
297            baseline_models = [RandomForestRegressor()]
298
299        for name, model in zip(baseline_names, baseline_models):
300            start = time.time()
301            try:
302                model.fit(X_train, y_train)
303                self.models_[name] = model
304                y_pred = model.predict(X_test)
305                r_squared = r2_score(y_test, y_pred)
306                adj_rsquared = adjusted_rsquared(
307                    r_squared, X_test.shape[0], X_test.shape[1]
308                )
309                rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
310
311                names.append(name)
312                R2.append(r_squared)
313                ADJR2.append(adj_rsquared)
314                RMSE.append(rmse)
315                TIME.append(time.time() - start)
316
317                if self.custom_metric:
318                    custom_metric = self.custom_metric(y_test, y_pred)
319                    CUSTOM_METRIC.append(custom_metric)
320
321                if self.verbose > 0:
322                    scores_verbose = {
323                        "Model": name,
324                        "R-Squared": r_squared,
325                        "Adjusted R-Squared": adj_rsquared,
326                        "RMSE": rmse,
327                        "Time taken": time.time() - start,
328                    }
329
330                    if self.custom_metric:
331                        scores_verbose[self.custom_metric.__name__] = (
332                            custom_metric
333                        )
334
335                    print(scores_verbose)
336                if self.predictions:
337                    predictions[name] = y_pred
338            except Exception as exception:
339                if self.ignore_warnings is False:
340                    print(name + " model failed to execute")
341                    print(exception)
342
343        if self.estimators == "all":
344            self.regressors = DEEPREGRESSORS
345        else:
346            self.regressors = [
347                ("DeepCustomRegressor(" + est[0] + ")", est[1])
348                for est in all_estimators()
349                if (
350                    issubclass(est[1], RegressorMixin)
351                    and (est[0] in self.estimators)
352                )
353            ]
354
355        if self.preprocess is True:
356            for name, model in tqdm(self.regressors):  # do parallel exec
357                start = time.time()
358                try:
359                    if "random_state" in model().get_params().keys():
360                        layer_regr = CustomRegressor(
361                            obj=model(random_state=self.random_state),
362                            n_hidden_features=self.n_hidden_features,
363                            activation_name=self.activation_name,
364                            a=self.a,
365                            nodes_sim=self.nodes_sim,
366                            bias=self.bias,
367                            dropout=self.dropout,
368                            direct_link=self.direct_link,
369                            n_clusters=self.n_clusters,
370                            cluster_encode=self.cluster_encode,
371                            type_clust=self.type_clust,
372                            type_scaling=self.type_scaling,
373                            col_sample=self.col_sample,
374                            row_sample=self.row_sample,
375                            seed=self.seed,
376                            backend=self.backend,
377                        )
378                    else:
379                        layer_regr = CustomRegressor(
380                            obj=model(),
381                            n_hidden_features=self.n_hidden_features,
382                            activation_name=self.activation_name,
383                            a=self.a,
384                            nodes_sim=self.nodes_sim,
385                            bias=self.bias,
386                            dropout=self.dropout,
387                            direct_link=self.direct_link,
388                            n_clusters=self.n_clusters,
389                            cluster_encode=self.cluster_encode,
390                            type_clust=self.type_clust,
391                            type_scaling=self.type_scaling,
392                            col_sample=self.col_sample,
393                            row_sample=self.row_sample,
394                            seed=self.seed,
395                            backend=self.backend,
396                        )
397
398                    for _ in range(self.n_layers):
399                        layer_regr = deepcopy(
400                            CustomRegressor(
401                                obj=layer_regr,
402                                n_hidden_features=self.n_hidden_features,
403                                activation_name=self.activation_name,
404                                a=self.a,
405                                nodes_sim=self.nodes_sim,
406                                bias=self.bias,
407                                dropout=self.dropout,
408                                direct_link=self.direct_link,
409                                n_clusters=self.n_clusters,
410                                cluster_encode=self.cluster_encode,
411                                type_clust=self.type_clust,
412                                type_scaling=self.type_scaling,
413                                col_sample=self.col_sample,
414                                row_sample=self.row_sample,
415                                seed=self.seed,
416                                backend=self.backend,
417                            )
418                        )
419
420                    layer_regr.fit(X_train, y_train)
421
422                    pipe = Pipeline(
423                        steps=[
424                            ("preprocessor", preprocessor),
425                            ("regressor", layer_regr),
426                        ]
427                    )
428
429                    pipe.fit(X_train, y_train)
430
431                    self.models_[name] = pipe
432                    y_pred = pipe.predict(X_test)
433                    r_squared = r2_score(y_test, y_pred)
434                    adj_rsquared = adjusted_rsquared(
435                        r_squared, X_test.shape[0], X_test.shape[1]
436                    )
437                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
438
439                    names.append(name)
440                    R2.append(r_squared)
441                    ADJR2.append(adj_rsquared)
442                    RMSE.append(rmse)
443                    TIME.append(time.time() - start)
444
445                    if self.custom_metric:
446                        custom_metric = self.custom_metric(y_test, y_pred)
447                        CUSTOM_METRIC.append(custom_metric)
448
449                    if self.verbose > 0:
450                        scores_verbose = {
451                            "Model": name,
452                            "R-Squared": r_squared,
453                            "Adjusted R-Squared": adj_rsquared,
454                            "RMSE": rmse,
455                            "Time taken": time.time() - start,
456                        }
457
458                        if self.custom_metric:
459                            scores_verbose[self.custom_metric.__name__] = (
460                                custom_metric
461                            )
462
463                        print(scores_verbose)
464                    if self.predictions:
465                        predictions[name] = y_pred
466                except Exception as exception:
467                    if self.ignore_warnings is False:
468                        print(name + " model failed to execute")
469                        print(exception)
470
471        else:  # no preprocessing
472            for name, model in tqdm(self.regressors):  # do parallel exec
473                start = time.time()
474                try:
475                    if "random_state" in model().get_params().keys():
476                        layer_regr = CustomRegressor(
477                            obj=model(random_state=self.random_state),
478                            n_hidden_features=self.n_hidden_features,
479                            activation_name=self.activation_name,
480                            a=self.a,
481                            nodes_sim=self.nodes_sim,
482                            bias=self.bias,
483                            dropout=self.dropout,
484                            direct_link=self.direct_link,
485                            n_clusters=self.n_clusters,
486                            cluster_encode=self.cluster_encode,
487                            type_clust=self.type_clust,
488                            type_scaling=self.type_scaling,
489                            col_sample=self.col_sample,
490                            row_sample=self.row_sample,
491                            seed=self.seed,
492                            backend=self.backend,
493                        )
494                    else:
495                        layer_regr = CustomRegressor(
496                            obj=model(),
497                            n_hidden_features=self.n_hidden_features,
498                            activation_name=self.activation_name,
499                            a=self.a,
500                            nodes_sim=self.nodes_sim,
501                            bias=self.bias,
502                            dropout=self.dropout,
503                            direct_link=self.direct_link,
504                            n_clusters=self.n_clusters,
505                            cluster_encode=self.cluster_encode,
506                            type_clust=self.type_clust,
507                            type_scaling=self.type_scaling,
508                            col_sample=self.col_sample,
509                            row_sample=self.row_sample,
510                            seed=self.seed,
511                            backend=self.backend,
512                        )
513
514                    layer_regr.fit(X_train, y_train)
515
516                    for _ in range(self.n_layers):
517                        layer_regr = deepcopy(
518                            CustomRegressor(
519                                obj=layer_regr,
520                                n_hidden_features=self.n_hidden_features,
521                                activation_name=self.activation_name,
522                                a=self.a,
523                                nodes_sim=self.nodes_sim,
524                                bias=self.bias,
525                                dropout=self.dropout,
526                                direct_link=self.direct_link,
527                                n_clusters=self.n_clusters,
528                                cluster_encode=self.cluster_encode,
529                                type_clust=self.type_clust,
530                                type_scaling=self.type_scaling,
531                                col_sample=self.col_sample,
532                                row_sample=self.row_sample,
533                                seed=self.seed,
534                                backend=self.backend,
535                            )
536                        )
537
538                        # layer_regr.fit(X_train, y_train)
539
540                    layer_regr.fit(X_train, y_train)
541
542                    self.models_[name] = layer_regr
543                    y_pred = layer_regr.predict(X_test)
544
545                    r_squared = r2_score(y_test, y_pred)
546                    adj_rsquared = adjusted_rsquared(
547                        r_squared, X_test.shape[0], X_test.shape[1]
548                    )
549                    rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
550
551                    names.append(name)
552                    R2.append(r_squared)
553                    ADJR2.append(adj_rsquared)
554                    RMSE.append(rmse)
555                    TIME.append(time.time() - start)
556
557                    if self.custom_metric:
558                        custom_metric = self.custom_metric(y_test, y_pred)
559                        CUSTOM_METRIC.append(custom_metric)
560
561                    if self.verbose > 0:
562                        scores_verbose = {
563                            "Model": name,
564                            "R-Squared": r_squared,
565                            "Adjusted R-Squared": adj_rsquared,
566                            "RMSE": rmse,
567                            "Time taken": time.time() - start,
568                        }
569
570                        if self.custom_metric:
571                            scores_verbose[self.custom_metric.__name__] = (
572                                custom_metric
573                            )
574
575                        print(scores_verbose)
576                    if self.predictions:
577                        predictions[name] = y_pred
578                except Exception as exception:
579                    if self.ignore_warnings is False:
580                        print(name + " model failed to execute")
581                        print(exception)
582
583        scores = {
584            "Model": names,
585            "Adjusted R-Squared": ADJR2,
586            "R-Squared": R2,
587            "RMSE": RMSE,
588            "Time Taken": TIME,
589        }
590
591        if self.custom_metric:
592            scores["Custom metric"] = CUSTOM_METRIC
593
594        scores = pd.DataFrame(scores)
595        scores = scores.sort_values(by=self.sort_by, ascending=True).set_index(
596            "Model"
597        )
598
599        self.best_model_ = self.models_[scores.index[0]]
600
601        if self.predictions is True:
602            return scores, predictions
603
604        return scores

Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.

predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.

def provide_models(self, X_train, X_test, y_train, y_test):
618    def provide_models(self, X_train, X_test, y_train, y_test):
619        """
620        This function returns all the model objects trained in fit function.
621        If fit is not called already, then we call fit and then return the models.
622
623        Parameters:
624
625            X_train : array-like,
626                Training vectors, where rows is the number of samples
627                and columns is the number of features.
628
629            X_test : array-like,
630                Testing vectors, where rows is the number of samples
631                and columns is the number of features.
632
633            y_train : array-like,
634                Training vectors, where rows is the number of samples
635                and columns is the number of features.
636
637            y_test : array-like,
638                Testing vectors, where rows is the number of samples
639                and columns is the number of features.
640
641        Returns:
642
643            models: dict-object,
644                Returns a dictionary with each model pipeline as value
645                with key as name of models.
646
647        """
648        if len(self.models_.keys()) == 0:
649            self.fit(X_train, X_test, y_train, y_test)
650
651        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

y_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

y_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class LazyMTS(nnetsauce.LazyDeepMTS):
 998class LazyMTS(LazyDeepMTS):
 999    """
1000    Fitting -- almost -- all the regression algorithms to multivariate time series
1001    and returning their scores (no layers).
1002
1003    Parameters:
1004
1005        verbose: int, optional (default=0)
1006            Any positive number for verbosity.
1007
1008        ignore_warnings: bool, optional (default=True)
1009            When set to True, the warning related to algorigms that are not
1010            able to run are ignored.
1011
1012        custom_metric: function, optional (default=None)
1013            When function is provided, models are evaluated based on the custom
1014              evaluation metric provided.
1015
1016        predictions: bool, optional (default=False)
1017            When set to True, the predictions of all the models models are returned as dataframe.
1018
1019        sort_by: string, optional (default='RMSE')
1020            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
1021            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
1022            provided by custom_metric.
1023
1024        random_state: int, optional (default=42)
1025            Reproducibiility seed.
1026
1027        estimators: list, optional (default='all')
1028            list of Estimators (regression algorithms) names or just 'all' (default='all')
1029
1030        preprocess: bool, preprocessing is done when set to True
1031
1032        h: int, optional (default=None)
1033            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
1034
1035        All the other parameters are the same as MTS's.
1036
1037    Attributes:
1038
1039        models_: dict-object
1040            Returns a dictionary with each model pipeline as value
1041            with key as name of models.
1042
1043        best_model_: object
1044            Returns the best model pipeline based on the sort_by metric.
1045
1046    Examples:
1047
1048        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
1049
1050    """
1051
1052    def __init__(
1053        self,
1054        verbose=0,
1055        ignore_warnings=True,
1056        custom_metric=None,
1057        predictions=False,
1058        sort_by=None,  # leave it as is
1059        random_state=42,
1060        estimators="all",
1061        preprocess=False,
1062        h=None,
1063        # MTS attributes
1064        obj=None,
1065        n_hidden_features=5,
1066        activation_name="relu",
1067        a=0.01,
1068        nodes_sim="sobol",
1069        bias=True,
1070        dropout=0,
1071        direct_link=True,
1072        n_clusters=2,
1073        cluster_encode=True,
1074        type_clust="kmeans",
1075        type_scaling=("std", "std", "std"),
1076        lags=15,
1077        type_pi="scp2-kde",
1078        block_size=None,
1079        replications=None,
1080        kernel=None,
1081        agg="mean",
1082        seed=123,
1083        backend="cpu",
1084        show_progress=False,
1085    ):
1086        super().__init__(
1087            verbose=verbose,
1088            ignore_warnings=ignore_warnings,
1089            custom_metric=custom_metric,
1090            predictions=predictions,
1091            sort_by=sort_by,
1092            random_state=random_state,
1093            estimators=estimators,
1094            preprocess=preprocess,
1095            n_layers=1,
1096            h=h,
1097            obj=obj,
1098            n_hidden_features=n_hidden_features,
1099            activation_name=activation_name,
1100            a=a,
1101            nodes_sim=nodes_sim,
1102            bias=bias,
1103            dropout=dropout,
1104            direct_link=direct_link,
1105            n_clusters=n_clusters,
1106            cluster_encode=cluster_encode,
1107            type_clust=type_clust,
1108            type_scaling=type_scaling,
1109            lags=lags,
1110            type_pi=type_pi,
1111            block_size=block_size,
1112            replications=replications,
1113            kernel=kernel,
1114            agg=agg,
1115            seed=seed,
1116            backend=backend,
1117            show_progress=show_progress,
1118        )

Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
class LazyDeepMTS(nnetsauce.MTS):
104class LazyDeepMTS(MTS):
105    """
106
107    Fitting -- almost -- all the regression algorithms with layers of
108    nnetsauce's CustomRegressor to multivariate time series
109    and returning their scores.
110
111    Parameters:
112
113        verbose: int, optional (default=0)
114            Any positive number for verbosity.
115
116        ignore_warnings: bool, optional (default=True)
117            When set to True, the warning related to algorigms that are not
118            able to run are ignored.
119
120        custom_metric: function, optional (default=None)
121            When function is provided, models are evaluated based on the custom
122              evaluation metric provided.
123
124        predictions: bool, optional (default=False)
125            When set to True, the predictions of all the models models are returned as dataframe.
126
127        sort_by: string, optional (default='RMSE')
128            Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
129            'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
130            provided by custom_metric.
131
132        random_state: int, optional (default=42)
133            Reproducibiility seed.
134
135        estimators: list, optional (default='all')
136            list of Estimators (regression algorithms) names or just 'all' (default='all')
137
138        preprocess: bool, preprocessing is done when set to True
139
140        n_layers: int, optional (default=1)
141            Number of layers in the network. When set to 1, the model is equivalent to a MTS.
142
143        h: int, optional (default=None)
144            Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
145
146        All the other parameters are the same as MTS's.
147
148    Attributes:
149
150        models_: dict-object
151            Returns a dictionary with each model pipeline as value
152            with key as name of models.
153
154        best_model_: object
155            Returns the best model pipeline based on the sort_by metric.
156
157    Examples:
158
159        See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
160
161    """
162
163    def __init__(
164        self,
165        verbose=0,
166        ignore_warnings=True,
167        custom_metric=None,
168        predictions=False,
169        sort_by=None,  # leave it as is
170        random_state=42,
171        estimators="all",
172        preprocess=False,
173        n_layers=1,
174        h=None,
175        # MTS attributes
176        obj=None,
177        n_hidden_features=5,
178        activation_name="relu",
179        a=0.01,
180        nodes_sim="sobol",
181        bias=True,
182        dropout=0,
183        direct_link=True,
184        n_clusters=2,
185        cluster_encode=True,
186        type_clust="kmeans",
187        type_scaling=("std", "std", "std"),
188        lags=15,
189        type_pi="scp2-kde",
190        block_size=None,
191        replications=None,
192        kernel=None,
193        agg="mean",
194        seed=123,
195        backend="cpu",
196        show_progress=False,
197    ):
198        self.verbose = verbose
199        self.ignore_warnings = ignore_warnings
200        self.custom_metric = custom_metric
201        self.predictions = predictions
202        self.sort_by = sort_by
203        self.models_ = {}
204        self.best_model_ = None
205        self.random_state = random_state
206        self.estimators = estimators
207        self.preprocess = preprocess
208        self.n_layers = n_layers
209        self.h = h
210        super().__init__(
211            obj=obj,
212            n_hidden_features=n_hidden_features,
213            activation_name=activation_name,
214            a=a,
215            nodes_sim=nodes_sim,
216            bias=bias,
217            dropout=dropout,
218            direct_link=direct_link,
219            n_clusters=n_clusters,
220            cluster_encode=cluster_encode,
221            type_clust=type_clust,
222            type_scaling=type_scaling,
223            seed=seed,
224            backend=backend,
225            lags=lags,
226            type_pi=type_pi,
227            block_size=block_size,
228            replications=replications,
229            kernel=kernel,
230            agg=agg,
231            verbose=verbose,
232            show_progress=show_progress,
233        )
234        if self.replications is not None or self.type_pi == "gaussian":
235            if self.sort_by is None:
236                self.sort_by = "WINKLERSCORE"
237        else:
238            if self.sort_by is None:
239                self.sort_by = "RMSE"
240
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores
950
951    def get_best_model(self):
952        """
953        This function returns the best model pipeline based on the sort_by metric.
954
955        Returns:
956
957            best_model: object,
958                Returns the best model pipeline based on the sort_by metric.
959
960        """
961        return self.best_model_
962
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.

Parameters:

verbose: int, optional (default=0)
    Any positive number for verbosity.

ignore_warnings: bool, optional (default=True)
    When set to True, the warning related to algorigms that are not
    able to run are ignored.

custom_metric: function, optional (default=None)
    When function is provided, models are evaluated based on the custom
      evaluation metric provided.

predictions: bool, optional (default=False)
    When set to True, the predictions of all the models models are returned as dataframe.

sort_by: string, optional (default='RMSE')
    Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
    'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
    provided by custom_metric.

random_state: int, optional (default=42)
    Reproducibiility seed.

estimators: list, optional (default='all')
    list of Estimators (regression algorithms) names or just 'all' (default='all')

preprocess: bool, preprocessing is done when set to True

n_layers: int, optional (default=1)
    Number of layers in the network. When set to 1, the model is equivalent to a MTS.

h: int, optional (default=None)
    Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).

All the other parameters are the same as MTS's.

Attributes:

models_: dict-object
    Returns a dictionary with each model pipeline as value
    with key as name of models.

best_model_: object
    Returns the best model pipeline based on the sort_by metric.

Examples:

See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
241    def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs):
242        """Fit Regression algorithms to X_train, predict and score on X_test.
243
244        Parameters:
245
246            X_train: array-like or data frame,
247                Training vectors, where rows is the number of samples
248                and columns is the number of features.
249
250            X_test: array-like or data frame,
251                Testing vectors, where rows is the number of samples
252                and columns is the number of features.
253
254            xreg: array-like, optional (default=None)
255                Additional (external) regressors to be passed to self.obj
256                xreg must be in 'increasing' order (most recent observations last)
257
258            per_series: bool, optional (default=False)
259                When set to True, the metrics are computed series by series.
260
261            **kwargs: dict, optional (default=None)
262                Additional parameters to be passed to `fit` method of `obj`.
263
264        Returns:
265
266            scores: Pandas DataFrame
267                Returns metrics of all the models in a Pandas DataFrame.
268
269            predictions: Pandas DataFrame
270                Returns predictions of all the models in a Pandas DataFrame.
271
272        """
273        R2 = []
274        ADJR2 = []
275        ME = []
276        MPL = []
277        RMSE = []
278        MAE = []
279        MPE = []
280        MAPE = []
281        WINKLERSCORE = []
282        COVERAGE = []
283
284        # WIN = []
285        names = []
286        TIME = []
287        predictions = {}
288
289        if self.custom_metric is not None:
290            CUSTOM_METRIC = []
291
292        if self.h is None:
293            assert X_test is not None, "If h is None, X_test must be provided."
294
295        if isinstance(X_train, np.ndarray):
296            X_train = pd.DataFrame(X_train)
297            X_test = pd.DataFrame(X_test)
298
299        self.series_names = X_train.columns.tolist()
300
301        X_train = convert_df_to_numeric(X_train)
302        X_test = convert_df_to_numeric(X_test)
303
304        numeric_features = X_train.select_dtypes(include=[np.number]).columns
305        categorical_features = X_train.select_dtypes(include=["object"]).columns
306
307        categorical_low, categorical_high = get_card_split(
308            X_train, categorical_features
309        )
310
311        if self.preprocess:
312            preprocessor = ColumnTransformer(
313                transformers=[
314                    ("numeric", numeric_transformer, numeric_features),
315                    (
316                        "categorical_low",
317                        categorical_transformer_low,
318                        categorical_low,
319                    ),
320                    (
321                        "categorical_high",
322                        categorical_transformer_high,
323                        categorical_high,
324                    ),
325                ]
326            )
327
328        # baselines (Classical MTS) ----
329        for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]):
330            try:
331                start = time.time()
332                regr = ClassicalMTS(model=name)
333                regr.fit(X_train, **kwargs)
334                self.models_[name] = regr
335                if self.h is None:
336                    X_pred = regr.predict(h=X_test.shape[0], **kwargs)
337                else:
338                    assert self.h > 0, "h must be > 0"
339                    X_pred = regr.predict(h=self.h, **kwargs)
340                    try:
341                        X_test = X_test[0: self.h, :]
342                    except Exception as e:
343                        X_test = X_test.iloc[0: self.h, :]
344
345                rmse = mean_errors(
346                    actual=X_test,
347                    pred=X_pred,
348                    scoring="root_mean_squared_error",
349                    per_series=per_series,
350                )
351                mae = mean_errors(
352                    actual=X_test,
353                    pred=X_pred,
354                    scoring="mean_absolute_error",
355                    per_series=per_series,
356                )
357                mpl = mean_errors(
358                    actual=X_test,
359                    pred=X_pred,
360                    scoring="mean_pinball_loss",
361                    per_series=per_series,
362                )
363            except Exception:
364                continue
365
366            names.append(name)
367            RMSE.append(rmse)
368            MAE.append(mae)
369            MPL.append(mpl)
370
371            if self.custom_metric is not None:
372                try:
373                    if self.h is None:
374                        custom_metric = self.custom_metric(X_test, X_pred)
375                    else:
376                        custom_metric = self.custom_metric(X_test_h, X_pred)
377                    CUSTOM_METRIC.append(custom_metric)
378                except Exception as e:
379                    custom_metric = np.iinfo(np.float32).max
380                    CUSTOM_METRIC.append(np.iinfo(np.float32).max)
381
382            if (self.replications is not None) or (self.type_pi == "gaussian"):
383                if per_series == False:
384                    winklerscore = winkler_score(
385                        obj=X_pred, actual=X_test, level=95
386                    )
387                    coveragecalc = coverage(X_pred, X_test, level=95)
388                else:
389                    winklerscore = winkler_score(
390                        obj=X_pred, actual=X_test, level=95, per_series=True
391                    )
392                    coveragecalc = coverage(
393                        X_pred, X_test, level=95, per_series=True
394                    )
395                WINKLERSCORE.append(winklerscore)
396                COVERAGE.append(coveragecalc)
397            TIME.append(time.time() - start)
398
399        if self.estimators == "all":
400            if self.n_layers <= 1:
401                self.regressors = REGRESSORSMTS
402            else:
403                self.regressors = DEEPREGRESSORSMTS
404        else:
405            if self.n_layers <= 1:
406                self.regressors = [
407                    ("MTS(" + est[0] + ")", est[1])
408                    for est in all_estimators()
409                    if (
410                        issubclass(est[1], RegressorMixin)
411                        and (est[0] in self.estimators)
412                    )
413                ]
414            else:  # self.n_layers > 1
415                self.regressors = [
416                    ("DeepMTS(" + est[0] + ")", est[1])
417                    for est in all_estimators()
418                    if (
419                        issubclass(est[1], RegressorMixin)
420                        and (est[0] in self.estimators)
421                    )
422                ]
423
424        if self.preprocess is True:
425            for name, model in tqdm(self.regressors):  # do parallel exec
426                start = time.time()
427                try:
428                    if "random_state" in model().get_params().keys():
429                        pipe = Pipeline(
430                            steps=[
431                                ("preprocessor", preprocessor),
432                                (
433                                    "regressor",
434                                    DeepMTS(
435                                        obj=model(
436                                            random_state=self.random_state,
437                                            **kwargs,
438                                        ),
439                                        n_layers=self.n_layers,
440                                        n_hidden_features=self.n_hidden_features,
441                                        activation_name=self.activation_name,
442                                        a=self.a,
443                                        nodes_sim=self.nodes_sim,
444                                        bias=self.bias,
445                                        dropout=self.dropout,
446                                        direct_link=self.direct_link,
447                                        n_clusters=self.n_clusters,
448                                        cluster_encode=self.cluster_encode,
449                                        type_clust=self.type_clust,
450                                        type_scaling=self.type_scaling,
451                                        lags=self.lags,
452                                        type_pi=self.type_pi,
453                                        block_size=self.block_size,
454                                        replications=self.replications,
455                                        kernel=self.kernel,
456                                        agg=self.agg,
457                                        seed=self.seed,
458                                        backend=self.backend,
459                                        show_progress=self.show_progress,
460                                    ),
461                                ),
462                            ]
463                        )
464                    else:  # "random_state" in model().get_params().keys()
465                        pipe = Pipeline(
466                            steps=[
467                                ("preprocessor", preprocessor),
468                                (
469                                    "regressor",
470                                    DeepMTS(
471                                        obj=model(**kwargs),
472                                        n_layers=self.n_layers,
473                                        n_hidden_features=self.n_hidden_features,
474                                        activation_name=self.activation_name,
475                                        a=self.a,
476                                        nodes_sim=self.nodes_sim,
477                                        bias=self.bias,
478                                        dropout=self.dropout,
479                                        direct_link=self.direct_link,
480                                        n_clusters=self.n_clusters,
481                                        cluster_encode=self.cluster_encode,
482                                        type_clust=self.type_clust,
483                                        type_scaling=self.type_scaling,
484                                        lags=self.lags,
485                                        type_pi=self.type_pi,
486                                        block_size=self.block_size,
487                                        replications=self.replications,
488                                        kernel=self.kernel,
489                                        agg=self.agg,
490                                        seed=self.seed,
491                                        backend=self.backend,
492                                        show_progress=self.show_progress,
493                                    ),
494                                ),
495                            ]
496                        )
497
498                    pipe.fit(X_train, **kwargs)
499                    # pipe.fit(X_train, xreg=xreg)
500
501                    self.models_[name] = pipe
502
503                    if self.h is None:
504                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
505                    else:
506                        assert self.h > 0, "h must be > 0"
507                        X_pred = pipe["regressor"].predict(h=self.h, **kwargs)
508
509                    if (self.replications is not None) or (
510                        self.type_pi == "gaussian"
511                    ):
512                        rmse = mean_errors(
513                            actual=X_test,
514                            pred=X_pred,
515                            scoring="root_mean_squared_error",
516                            per_series=per_series,
517                        )
518                        mae = mean_errors(
519                            actual=X_test,
520                            pred=X_pred,
521                            scoring="mean_absolute_error",
522                            per_series=per_series,
523                        )
524                        mpl = mean_errors(
525                            actual=X_test,
526                            pred=X_pred,
527                            scoring="mean_pinball_loss",
528                            per_series=per_series,
529                        )
530                        winklerscore = winkler_score(
531                            obj=X_pred,
532                            actual=X_test,
533                            level=95,
534                            per_series=per_series,
535                        )
536                        coveragecalc = coverage(
537                            X_pred, X_test, level=95, per_series=per_series
538                        )
539                    else:
540                        rmse = mean_errors(
541                            actual=X_test,
542                            pred=X_pred,
543                            scoring="root_mean_squared_error",
544                            per_series=per_series,
545                        )
546                        mae = mean_errors(
547                            actual=X_test,
548                            pred=X_pred,
549                            scoring="mean_absolute_error",
550                            per_series=per_series,
551                        )
552                        mpl = mean_errors(
553                            actual=X_test,
554                            pred=X_pred,
555                            scoring="mean_pinball_loss",
556                            per_series=per_series,
557                        )
558
559                    names.append(name)
560                    RMSE.append(rmse)
561                    MAE.append(mae)
562                    MPL.append(mpl)
563
564                    if (self.replications is not None) or (
565                        self.type_pi == "gaussian"
566                    ):
567                        WINKLERSCORE.append(winklerscore)
568                        COVERAGE.append(coveragecalc)
569                    TIME.append(time.time() - start)
570
571                    if self.custom_metric is not None:
572                        try:
573                            custom_metric = self.custom_metric(X_test, X_pred)
574                            CUSTOM_METRIC.append(custom_metric)
575                        except Exception as e:
576                            custom_metric = np.iinfo(np.float32).max
577                            CUSTOM_METRIC.append(custom_metric)
578
579                    if self.verbose > 0:
580                        if (self.replications is not None) or (
581                            self.type_pi == "gaussian"
582                        ):
583                            scores_verbose = {
584                                "Model": name,
585                                "RMSE": rmse,
586                                "MAE": mae,
587                                "MPL": mpl,
588                                "WINKLERSCORE": winklerscore,
589                                "COVERAGE": coveragecalc,
590                                "Time taken": time.time() - start,
591                            }
592                        else:
593                            scores_verbose = {
594                                "Model": name,
595                                "RMSE": rmse,
596                                "MAE": mae,
597                                "MPL": mpl,
598                                "Time taken": time.time() - start,
599                            }
600
601                        if self.custom_metric is not None:
602                            scores_verbose["Custom metric"] = custom_metric
603
604                    if self.predictions:
605                        predictions[name] = X_pred
606                except Exception as exception:
607                    if self.ignore_warnings is False:
608                        print(name + " model failed to execute")
609                        print(exception)
610
611        else:  # no preprocessing
612            for name, model in tqdm(self.regressors):  # do parallel exec
613                start = time.time()
614                try:
615                    if "random_state" in model().get_params().keys():
616                        pipe = DeepMTS(
617                            obj=model(random_state=self.random_state, **kwargs),
618                            n_layers=self.n_layers,
619                            n_hidden_features=self.n_hidden_features,
620                            activation_name=self.activation_name,
621                            a=self.a,
622                            nodes_sim=self.nodes_sim,
623                            bias=self.bias,
624                            dropout=self.dropout,
625                            direct_link=self.direct_link,
626                            n_clusters=self.n_clusters,
627                            cluster_encode=self.cluster_encode,
628                            type_clust=self.type_clust,
629                            type_scaling=self.type_scaling,
630                            lags=self.lags,
631                            type_pi=self.type_pi,
632                            block_size=self.block_size,
633                            replications=self.replications,
634                            kernel=self.kernel,
635                            agg=self.agg,
636                            seed=self.seed,
637                            backend=self.backend,
638                            show_progress=self.show_progress,
639                        )
640                    else:
641                        pipe = DeepMTS(
642                            obj=model(**kwargs),
643                            n_layers=self.n_layers,
644                            n_hidden_features=self.n_hidden_features,
645                            activation_name=self.activation_name,
646                            a=self.a,
647                            nodes_sim=self.nodes_sim,
648                            bias=self.bias,
649                            dropout=self.dropout,
650                            direct_link=self.direct_link,
651                            n_clusters=self.n_clusters,
652                            cluster_encode=self.cluster_encode,
653                            type_clust=self.type_clust,
654                            type_scaling=self.type_scaling,
655                            lags=self.lags,
656                            type_pi=self.type_pi,
657                            block_size=self.block_size,
658                            replications=self.replications,
659                            kernel=self.kernel,
660                            agg=self.agg,
661                            seed=self.seed,
662                            backend=self.backend,
663                            show_progress=self.show_progress,
664                        )
665
666                    pipe.fit(X_train, xreg, **kwargs)
667                    # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
668
669                    self.models_[name] = pipe
670
671                    if self.preprocess is True:
672                        if self.h is None:
673                            X_pred = pipe["regressor"].predict(
674                                h=X_test.shape[0], **kwargs
675                            )
676                        else:
677                            assert (
678                                self.h > 0 and self.h <= X_test.shape[0]
679                            ), "h must be > 0 and < X_test.shape[0]"
680                            X_pred = pipe["regressor"].predict(
681                                h=self.h, **kwargs
682                            )
683
684                    else:
685                        if self.h is None:
686                            X_pred = pipe.predict(
687                                h=X_test.shape[0],
688                                **kwargs,
689                                # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
690                            )
691                        else:
692                            assert (
693                                self.h > 0 and self.h <= X_test.shape[0]
694                            ), "h must be > 0 and < X_test.shape[0]"
695                            X_pred = pipe.predict(h=self.h, **kwargs)
696
697                    if self.h is None:
698                        if (self.replications is not None) or (
699                            self.type_pi == "gaussian"
700                        ):
701                            rmse = mean_errors(
702                                actual=X_test,
703                                pred=X_pred.mean,
704                                scoring="root_mean_squared_error",
705                                per_series=per_series,
706                            )
707                            mae = mean_errors(
708                                actual=X_test,
709                                pred=X_pred.mean,
710                                scoring="mean_absolute_error",
711                                per_series=per_series,
712                            )
713                            mpl = mean_errors(
714                                actual=X_test,
715                                pred=X_pred.mean,
716                                scoring="mean_pinball_loss",
717                                per_series=per_series,
718                            )
719                            winklerscore = winkler_score(
720                                obj=X_pred,
721                                actual=X_test,
722                                level=95,
723                                per_series=per_series,
724                            )
725                            coveragecalc = coverage(
726                                X_pred, X_test, level=95, per_series=per_series
727                            )
728                        else:  # no prediction interval
729                            rmse = mean_errors(
730                                actual=X_test,
731                                pred=X_pred,
732                                scoring="root_mean_squared_error",
733                                per_series=per_series,
734                            )
735                            mae = mean_errors(
736                                actual=X_test,
737                                pred=X_pred,
738                                scoring="mean_absolute_error",
739                                per_series=per_series,
740                            )
741                            mpl = mean_errors(
742                                actual=X_test,
743                                pred=X_pred,
744                                scoring="mean_pinball_loss",
745                                per_series=per_series,
746                            )
747                    else:  # self.h is not None
748                        if (self.replications is not None) or (
749                            self.type_pi == "gaussian"
750                        ):
751                            if isinstance(X_test, pd.DataFrame):
752                                X_test_h = X_test.iloc[0: self.h, :]
753                                rmse = mean_errors(
754                                    actual=X_test_h,
755                                    pred=X_pred,
756                                    scoring="root_mean_squared_error",
757                                    per_series=per_series,
758                                )
759                                mae = mean_errors(
760                                    actual=X_test_h,
761                                    pred=X_pred,
762                                    scoring="mean_absolute_error",
763                                    per_series=per_series,
764                                )
765                                mpl = mean_errors(
766                                    actual=X_test_h,
767                                    pred=X_pred,
768                                    scoring="mean_pinball_loss",
769                                    per_series=per_series,
770                                )
771                                winklerscore = winkler_score(
772                                    obj=X_pred,
773                                    actual=X_test_h,
774                                    level=95,
775                                    per_series=per_series,
776                                )
777                                coveragecalc = coverage(
778                                    X_pred,
779                                    X_test_h,
780                                    level=95,
781                                    per_series=per_series,
782                                )
783                            else:
784                                X_test_h = X_test[0: self.h, :]
785                                rmse = mean_errors(
786                                    actual=X_test_h,
787                                    pred=X_pred,
788                                    scoring="root_mean_squared_error",
789                                    per_series=per_series,
790                                )
791                                mae = mean_errors(
792                                    actual=X_test_h,
793                                    pred=X_pred,
794                                    scoring="mean_absolute_error",
795                                    per_series=per_series,
796                                )
797                                mpl = mean_errors(
798                                    actual=X_test_h,
799                                    pred=X_pred,
800                                    scoring="mean_pinball_loss",
801                                    per_series=per_series,
802                                )
803                                winklerscore = winkler_score(
804                                    obj=X_pred,
805                                    actual=X_test_h,
806                                    level=95,
807                                    per_series=per_series,
808                                )
809                                coveragecalc = coverage(
810                                    X_pred,
811                                    X_test_h,
812                                    level=95,
813                                    per_series=per_series,
814                                )
815                        else:  # no prediction interval
816                            if isinstance(X_test, pd.DataFrame):
817                                X_test_h = X_test.iloc[0: self.h, :]
818                                rmse = mean_errors(
819                                    actual=X_test_h,
820                                    pred=X_pred,
821                                    scoring="root_mean_squared_error",
822                                    per_series=per_series,
823                                )
824                                mae = mean_errors(
825                                    actual=X_test_h,
826                                    pred=X_pred,
827                                    scoring="mean_absolute_error",
828                                    per_series=per_series,
829                                )
830                                mpl = mean_errors(
831                                    actual=X_test_h,
832                                    pred=X_pred,
833                                    scoring="mean_pinball_loss",
834                                    per_series=per_series,
835                                )
836                            else:
837                                X_test_h = X_test[0: self.h, :]
838                                rmse = mean_errors(
839                                    actual=X_test_h,
840                                    pred=X_pred,
841                                    scoring="root_mean_squared_error",
842                                    per_series=per_series,
843                                )
844                                mae = mean_errors(
845                                    actual=X_test_h,
846                                    pred=X_pred,
847                                    scoring="mean_absolute_error",
848                                    per_series=per_series,
849                                )
850
851                    names.append(name)
852                    RMSE.append(rmse)
853                    MAE.append(mae)
854                    MPL.append(mpl)
855                    if (self.replications is not None) or (
856                        self.type_pi == "gaussian"
857                    ):
858                        WINKLERSCORE.append(winklerscore)
859                        COVERAGE.append(coveragecalc)
860                    TIME.append(time.time() - start)
861
862                    if self.custom_metric is not None:
863                        try:
864                            if self.h is None:
865                                custom_metric = self.custom_metric(
866                                    X_test, X_pred
867                                )
868                            else:
869                                custom_metric = self.custom_metric(
870                                    X_test_h, X_pred
871                                )
872                            CUSTOM_METRIC.append(custom_metric)
873                        except Exception as e:
874                            custom_metric = np.iinfo(np.float32).max
875                            CUSTOM_METRIC.append(np.iinfo(np.float32).max)
876
877                    if self.verbose > 0:
878                        if (self.replications is not None) or (
879                            self.type_pi == "gaussian"
880                        ):
881                            scores_verbose = {
882                                "Model": name,
883                                "RMSE": rmse,
884                                "MAE": mae,
885                                "MPL": mpl,
886                                "WINKLERSCORE": winklerscore,
887                                "COVERAGE": coveragecalc,
888                                "Time taken": time.time() - start,
889                            }
890                        else:
891                            scores_verbose = {
892                                "Model": name,
893                                "RMSE": rmse,
894                                "MAE": mae,
895                                "MPL": mpl,
896                                "Time taken": time.time() - start,
897                            }
898
899                        if self.custom_metric is not None:
900                            scores_verbose["Custom metric"] = custom_metric
901
902                    if self.predictions:
903                        predictions[name] = X_pred
904
905                except Exception as exception:
906                    if self.ignore_warnings is False:
907                        print(name + " model failed to execute")
908                        print(exception)
909
910        if (self.replications is not None) or (self.type_pi == "gaussian"):
911            scores = {
912                "Model": names,
913                "RMSE": RMSE,
914                "MAE": MAE,
915                "MPL": MPL,
916                "WINKLERSCORE": WINKLERSCORE,
917                "COVERAGE": COVERAGE,
918                "Time Taken": TIME,
919            }
920        else:
921            scores = {
922                "Model": names,
923                "RMSE": RMSE,
924                "MAE": MAE,
925                "MPL": MPL,
926                "Time Taken": TIME,
927            }
928
929        if self.custom_metric is not None:
930            scores["Custom metric"] = CUSTOM_METRIC
931
932        if per_series:
933            scores = dict_to_dataframe_series(scores, self.series_names)
934        else:
935            scores = pd.DataFrame(scores)
936
937        try:  # case per_series, can't be sorted
938            scores = scores.sort_values(
939                by=self.sort_by, ascending=True
940            ).set_index("Model")
941
942            self.best_model_ = self.models_[scores.index[0]]
943        except Exception as e:
944            pass
945
946        if self.predictions is True:
947            return scores, predictions
948
949        return scores

Fit Regression algorithms to X_train, predict and score on X_test.

Parameters:

X_train: array-like or data frame,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test: array-like or data frame,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

xreg: array-like, optional (default=None)
    Additional (external) regressors to be passed to self.obj
    xreg must be in 'increasing' order (most recent observations last)

per_series: bool, optional (default=False)
    When set to True, the metrics are computed series by series.

**kwargs: dict, optional (default=None)
    Additional parameters to be passed to `fit` method of `obj`.

Returns:

scores: Pandas DataFrame
    Returns metrics of all the models in a Pandas DataFrame.

predictions: Pandas DataFrame
    Returns predictions of all the models in a Pandas DataFrame.
def provide_models(self, X_train, X_test):
963    def provide_models(self, X_train, X_test):
964        """
965        This function returns all the model objects trained in fit function.
966        If fit is not called already, then we call fit and then return the models.
967
968        Parameters:
969
970            X_train : array-like,
971                Training vectors, where rows is the number of samples
972                and columns is the number of features.
973
974            X_test : array-like,
975                Testing vectors, where rows is the number of samples
976                and columns is the number of features.
977
978        Returns:
979
980            models: dict-object,
981                Returns a dictionary with each model pipeline as value
982                with key as name of models.
983
984        """
985        if self.h is None:
986            if len(self.models_.keys()) == 0:
987                self.fit(X_train, X_test)
988        else:
989            if len(self.models_.keys()) == 0:
990                if isinstance(X_test, pd.DataFrame):
991                    self.fit(X_train, X_test.iloc[0: self.h, :])
992                else:
993                    self.fit(X_train, X_test[0: self.h, :])
994
995        return self.models_

This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.

Parameters:

X_train : array-like,
    Training vectors, where rows is the number of samples
    and columns is the number of features.

X_test : array-like,
    Testing vectors, where rows is the number of samples
    and columns is the number of features.

Returns:

models: dict-object,
    Returns a dictionary with each model pipeline as value
    with key as name of models.
class MLARCH:
 10class MLARCH:
 11    """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
 12
 13    Parameters
 14    ----------
 15    model_mean : object
 16        Model for mean component
 17    model_sigma : object
 18        Model for volatility component (sklearn regressor)
 19    model_residuals : object
 20        Model for standardized residuals
 21    lags_vol : int, default=10
 22        Number of lags for squared residuals in volatility model
 23    """
 24
 25    def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10):
 26        self.model_mean = model_mean
 27        self.model_sigma = model_sigma
 28        self.model_residuals = model_residuals
 29        self.lags_vol = lags_vol
 30
 31    def _create_lags(self, y, lags):
 32        """Create lagged feature matrix"""
 33        n = len(y)
 34        if n <= lags:
 35            raise ValueError(f"Series length {n} must be > lags {lags}")
 36        X = np.zeros((n - lags, lags))
 37        for i in range(lags):
 38            X[:, i] = y[i: (n - lags + i)]
 39        return X
 40
 41    def fit(self, y, **kwargs):
 42        """Fit the MLARCH model
 43
 44        Parameters
 45        ----------
 46        y : array-like
 47            Target time series (should be stationary, e.g., returns)
 48
 49        Returns
 50        -------
 51        self
 52        """
 53        # Format input
 54        if isinstance(y, (pd.Series, pd.DataFrame)):
 55            y = y.values
 56        y = y.ravel()
 57
 58        if len(y) < self.lags_vol + 20:
 59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
 60
 61        # Step 1: Fit mean model
 62        self.model_mean.fit(y.reshape(-1, 1))
 63        mean_residuals = self.model_mean.residuals_.ravel()
 64
 65        # Step 2: Fit ARCH volatility model on lagged squared residuals
 66        resid_squared = mean_residuals**2
 67        X_vol = self._create_lags(resid_squared, self.lags_vol)
 68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
 69
 70        self.model_sigma.fit(X_vol, y_vol)
 71
 72        # Get fitted volatility
 73        fitted_log_sigma = self.model_sigma.predict(X_vol)
 74        fitted_sigma = np.exp(fitted_log_sigma)
 75
 76        # Step 3: Compute standardized residuals with proper scaling
 77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
 78            fitted_sigma
 79        )
 80
 81        # Enforce zero mean and unit variance
 82        self.z_mean_ = np.mean(standardized_residuals)
 83        self.z_std_ = np.std(standardized_residuals)
 84        standardized_residuals = (
 85            standardized_residuals - self.z_mean_
 86        ) / self.z_std_
 87
 88        # Step 4: Fit residuals model
 89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
 90
 91        # Store for prediction
 92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
 93
 94        # Store diagnostics
 95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
 96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
 97
 98        return self
 99
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)

Parameters

model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model

def fit(self, y, **kwargs):
41    def fit(self, y, **kwargs):
42        """Fit the MLARCH model
43
44        Parameters
45        ----------
46        y : array-like
47            Target time series (should be stationary, e.g., returns)
48
49        Returns
50        -------
51        self
52        """
53        # Format input
54        if isinstance(y, (pd.Series, pd.DataFrame)):
55            y = y.values
56        y = y.ravel()
57
58        if len(y) < self.lags_vol + 20:
59            raise ValueError(f"Need at least {self.lags_vol + 20} observations")
60
61        # Step 1: Fit mean model
62        self.model_mean.fit(y.reshape(-1, 1))
63        mean_residuals = self.model_mean.residuals_.ravel()
64
65        # Step 2: Fit ARCH volatility model on lagged squared residuals
66        resid_squared = mean_residuals**2
67        X_vol = self._create_lags(resid_squared, self.lags_vol)
68        y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8)
69
70        self.model_sigma.fit(X_vol, y_vol)
71
72        # Get fitted volatility
73        fitted_log_sigma = self.model_sigma.predict(X_vol)
74        fitted_sigma = np.exp(fitted_log_sigma)
75
76        # Step 3: Compute standardized residuals with proper scaling
77        standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt(
78            fitted_sigma
79        )
80
81        # Enforce zero mean and unit variance
82        self.z_mean_ = np.mean(standardized_residuals)
83        self.z_std_ = np.std(standardized_residuals)
84        standardized_residuals = (
85            standardized_residuals - self.z_mean_
86        ) / self.z_std_
87
88        # Step 4: Fit residuals model
89        self.model_residuals.fit(standardized_residuals.reshape(-1, 1))
90
91        # Store for prediction
92        self.last_residuals_squared_ = resid_squared[-self.lags_vol:]
93
94        # Store diagnostics
95        self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma))
96        self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma))
97
98        return self

Fit the MLARCH model

Parameters

y : array-like Target time series (should be stationary, e.g., returns)

Returns

self

def predict(self, h=5, level=95, return_sims=False):
100    def predict(self, h=5, level=95, return_sims=False):
101        """Predict future values
102
103        Parameters
104        ----------
105        h : int
106            Forecast horizon
107        level : int
108            Confidence level for prediction intervals
109        return_sims : bool
110            If True, return full simulation paths
111
112        Returns
113        -------
114        DescribeResult
115            Named tuple with mean, sims, lower, upper
116        """
117        DescribeResult = namedtuple(
118            "DescribeResult", ("mean", "sims", "lower", "upper")
119        )
120
121        # Get mean forecast
122        mean_forecast = self.model_mean.predict(h=h).values.ravel()
123
124        # Recursive ARCH volatility forecasting
125        sigma_forecast = np.zeros(h)
126        current_lags = self.last_residuals_squared_.copy()
127
128        for i in range(h):
129            X_t = current_lags.reshape(1, -1)
130            log_sigma_t = self.model_sigma.predict(X_t)[0]
131            sigma_forecast[i] = np.exp(log_sigma_t)
132            # Update lags with predicted variance
133            current_lags = np.append(current_lags[1:], sigma_forecast[i])
134
135        # Predict standardized residuals and rescale
136        z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel()
137        z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_
138
139        # Combine: μ + z × σ
140        point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast)
141
142        # Generate prediction intervals
143        sims = None
144        if return_sims:
145            preds_z_for_sims = self.model_residuals.predict(h=h)
146            if hasattr(preds_z_for_sims, "sims") and isinstance(
147                preds_z_for_sims.sims, pd.DataFrame
148            ):
149                sims_z_normalized = preds_z_for_sims.sims
150                n_sims = sims_z_normalized.shape[1]
151
152                sims = np.zeros((h, n_sims))
153                for sim_idx in range(n_sims):
154                    # Rescale simulations
155                    z_sim = (
156                        sims_z_normalized.iloc[:, sim_idx].values * self.z_std_
157                        + self.z_mean_
158                    )
159                    sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt(
160                        sigma_forecast
161                    )
162
163                alpha = 1 - level / 100
164                lower_bound = np.quantile(sims, alpha / 2, axis=1)
165                upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1)
166            else:
167                # Fallback to Gaussian
168                z_score = norm.ppf(1 - (1 - level / 100) / 2)
169                margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
170                lower_bound = point_forecast - margin
171                upper_bound = point_forecast + margin
172        else:
173            # Gaussian intervals with proper scaling
174            z_score = norm.ppf(1 - (1 - level / 100) / 2)
175            margin = z_score * np.sqrt(sigma_forecast) * self.z_std_
176            lower_bound = point_forecast - margin
177            upper_bound = point_forecast + margin
178
179        return DescribeResult(point_forecast, sims, lower_bound, upper_bound)

Predict future values

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths

Returns

DescribeResult Named tuple with mean, sims, lower, upper

class MedianVotingRegressor(sklearn.ensemble._voting.VotingRegressor):
 6class MedianVotingRegressor(VotingRegressor):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Prediction voting regressor for unfitted estimators.

A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.

For a detailed example, refer to :ref:sphx_glr_auto_examples_ensemble_plot_voting_regressor.py.

Read more in the :ref:User Guide <voting_regressor>.

New in version 0.21.

Parameters

estimators : list of (str, estimator) tuples Invoking the fit method on the VotingRegressor will fit clones of those original estimators that will be stored in the class attribute self.estimators_. An estimator can be set to 'drop' using set_params().

*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.

weights : array-like of shape (n_regressors,), default=None Sequence of weights (float or int) to weight the occurrences of predicted values before averaging. Uses uniform weights if None.

n_jobs : int, default=None The number of jobs to run in parallel for fit. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors. See :term:Glossary <n_jobs> for more details.

verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.

*New in version 0.23.*

Attributes

estimators_ : list of regressors The collection of fitted sub-estimators as defined in estimators that are not 'drop'.

named_estimators_ : ~sklearn.utils.Bunch Attribute to access any fitted sub-estimators by name.

*New in version 0.20.*

n_features_in_ : int Number of features seen during :term:fit. Only defined if the underlying regressor exposes such an attribute when fit.

*New in version 0.24.*

feature_names_in_ : ndarray of shape (n_features_in_,) Names of features seen during :term:fit. Only defined if the underlying estimators expose such an attribute when fit.

*New in version 1.0.*

See Also

VotingClassifier : Soft Voting/Majority Rule classifier.

Examples

>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8  8.4 12.5 17.8 26  34]

In the following example, we drop the 'lr' estimator with ~VotingRegressor.set_params() and fit the remaining two estimators:

>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
def predict(self, X):
 7    def predict(self, X):
 8        """
 9        Predict using the median of the base regressors' predictions.
10
11        Parameters:
12        X (array-like): Feature matrix for predictions.
13
14        Returns:
15        y_pred (array): Median of predictions from the base regressors.
16        """
17        predictions = np.asarray(
18            [regressor.predict(X) for regressor in self.estimators_]
19        )
20        return np.median(predictions, axis=0)

Predict using the median of the base regressors' predictions.

Parameters: X (array-like): Feature matrix for predictions.

Returns: y_pred (array): Median of predictions from the base regressors.

class MTS(nnetsauce.Base):
  31class MTS(Base):
  32    """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
  33
  34    Parameters:
  35
  36        obj: object.
  37            any object containing a method fit (obj.fit()) and a method predict
  38            (obj.predict()).
  39
  40        n_hidden_features: int.
  41            number of nodes in the hidden layer.
  42
  43        activation_name: str.
  44            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
  45
  46        a: float.
  47            hyperparameter for 'prelu' or 'elu' activation function.
  48
  49        nodes_sim: str.
  50            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
  51            'uniform'.
  52
  53        bias: boolean.
  54            indicates if the hidden layer contains a bias term (True) or not
  55            (False).
  56
  57        dropout: float.
  58            regularization parameter; (random) percentage of nodes dropped out
  59            of the training.
  60
  61        direct_link: boolean.
  62            indicates if the original predictors are included (True) in model's fitting or not (False).
  63
  64        n_clusters: int.
  65            number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
  66
  67        cluster_encode: bool.
  68            defines how the variable containing clusters is treated (default is one-hot)
  69            if `False`, then labels are used, without one-hot encoding.
  70
  71        type_clust: str.
  72            type of clustering method: currently k-means ('kmeans') or Gaussian
  73            Mixture Model ('gmm').
  74
  75        type_scaling: a tuple of 3 strings.
  76            scaling methods for inputs, hidden layer, and clustering respectively
  77            (and when relevant).
  78            Currently available: standardization ('std') or MinMax scaling ('minmax').
  79
  80        lags: int.
  81            number of lags used for each time series.
  82            If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
  83
  84        type_pi: str.
  85            type of prediction interval; currently:
  86            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
  87            - "quantile": use model-agnostic quantile regression under the hood
  88            - "kde": based on Kernel Density Estimation of in-sample residuals
  89            - "bootstrap": based on independent bootstrap of in-sample residuals
  90            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
  91            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
  92            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
  93            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
  94            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
  95            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
  96            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
  97            - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
  98            'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
  99            - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
 100            'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
 101            - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
 102            'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
 103
 104        level: int.
 105            level of confidence for `type_pi == 'quantile'` (default is `95`)
 106
 107        block_size: int.
 108            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
 109            Default is round(3.15*(n_residuals^1/3))
 110
 111        replications: int.
 112            number of replications (if needed, for predictive simulation). Default is 'None'.
 113
 114        kernel: str.
 115            the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
 116
 117        agg: str.
 118            either "mean" or "median" for simulation of bootstrap aggregating
 119
 120        seed: int.
 121            reproducibility seed for nodes_sim=='uniform' or predictive simulation.
 122
 123        backend: str.
 124            "cpu" or "gpu" or "tpu".
 125
 126        verbose: int.
 127            0: not printing; 1: printing
 128
 129        show_progress: bool.
 130            True: progress bar when fitting each series; False: no progress bar when fitting each series
 131
 132    Attributes:
 133
 134        fit_objs_: dict
 135            objects adjusted to each individual time series
 136
 137        y_: {array-like}
 138            MTS responses (most recent observations first)
 139
 140        X_: {array-like}
 141            MTS lags
 142
 143        xreg_: {array-like}
 144            external regressors
 145
 146        y_means_: dict
 147            a dictionary of each series mean values
 148
 149        preds_: {array-like}
 150            successive model predictions
 151
 152        preds_std_: {array-like}
 153            standard deviation around the predictions for Bayesian base learners (`obj`)
 154
 155        gaussian_preds_std_: {array-like}
 156            standard deviation around the predictions for `type_pi='gaussian'`
 157
 158        return_std_: boolean
 159            return uncertainty or not (set in predict)
 160
 161        df_: data frame
 162            the input data frame, in case a data.frame is provided to `fit`
 163
 164        n_obs_: int
 165            number of time series observations (number of rows for multivariate)
 166
 167        level_: int
 168            level of confidence for prediction intervals (default is 95)
 169
 170        residuals_: {array-like}
 171            in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
 172            (for `type_pi` in conformal prediction)
 173
 174        residuals_sims_: tuple of {array-like}
 175            simulations of in-sample residuals (for `type_pi` not conformal prediction) or
 176            calibrated residuals (for `type_pi` in conformal prediction)
 177
 178        kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
 179
 180        residuals_std_dev_: residuals standard deviation
 181
 182    Examples:
 183
 184    Example 1:
 185
 186    ```python
 187    import nnetsauce as ns
 188    import numpy as np
 189    from sklearn import linear_model
 190    np.random.seed(123)
 191
 192    M = np.random.rand(10, 3)
 193    M[:,0] = 10*M[:,0]
 194    M[:,2] = 25*M[:,2]
 195    print(M)
 196
 197    # Adjust Bayesian Ridge
 198    regr4 = linear_model.BayesianRidge()
 199    obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
 200    obj_MTS.fit(M)
 201    print(obj_MTS.predict())
 202
 203    # with credible intervals
 204    print(obj_MTS.predict(return_std=True, level=80))
 205
 206    print(obj_MTS.predict(return_std=True, level=95))
 207    ```
 208
 209    Example 2:
 210
 211    ```python
 212    import nnetsauce as ns
 213    import numpy as np
 214    from sklearn import linear_model
 215
 216    dataset = {
 217    'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
 218    'series1' : [34, 30, 35.6, 33.3, 38.1],
 219    'series2' : [4, 5.5, 5.6, 6.3, 5.1],
 220    'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
 221    df = pd.DataFrame(dataset).set_index('date')
 222    print(df)
 223
 224    # Adjust Bayesian Ridge
 225    regr5 = linear_model.BayesianRidge()
 226    obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
 227    obj_MTS.fit(df)
 228    print(obj_MTS.predict())
 229
 230    # with credible intervals
 231    print(obj_MTS.predict(return_std=True, level=80))
 232
 233    print(obj_MTS.predict(return_std=True, level=95))
 234    ```
 235    """
 236
 237    # construct the object -----
 238
 239    def __init__(
 240        self,
 241        obj,
 242        n_hidden_features=5,
 243        activation_name="relu",
 244        a=0.01,
 245        nodes_sim="sobol",
 246        bias=True,
 247        dropout=0,
 248        direct_link=True,
 249        n_clusters=2,
 250        cluster_encode=True,
 251        type_clust="kmeans",
 252        type_scaling=("std", "std", "std"),
 253        lags=1,
 254        type_pi="kde",
 255        level=95,
 256        block_size=None,
 257        replications=None,
 258        kernel="gaussian",
 259        agg="mean",
 260        seed=123,
 261        backend="cpu",
 262        verbose=0,
 263        show_progress=True,
 264    ):
 265        super().__init__(
 266            n_hidden_features=n_hidden_features,
 267            activation_name=activation_name,
 268            a=a,
 269            nodes_sim=nodes_sim,
 270            bias=bias,
 271            dropout=dropout,
 272            direct_link=direct_link,
 273            n_clusters=n_clusters,
 274            cluster_encode=cluster_encode,
 275            type_clust=type_clust,
 276            type_scaling=type_scaling,
 277            seed=seed,
 278            backend=backend,
 279        )
 280
 281        # Add validation for lags parameter
 282        if isinstance(lags, str):
 283            assert lags in (
 284                "AIC",
 285                "AICc",
 286                "BIC",
 287            ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'"
 288        else:
 289            assert (
 290                int(lags) == lags
 291            ), "if numeric, lags parameter should be an integer"
 292
 293        self.obj = obj
 294        self.n_series = None
 295        self.lags = lags
 296        self.type_pi = type_pi
 297        self.level = level
 298        if self.type_pi == "quantile":
 299            self.obj = QuantileRegressor(
 300                self.obj, level=self.level, scoring="conformal"
 301            )
 302        self.block_size = block_size
 303        self.replications = replications
 304        self.kernel = kernel
 305        self.agg = agg
 306        self.verbose = verbose
 307        self.show_progress = show_progress
 308        self.series_names = ["series0"]
 309        self.input_dates = None
 310        self.quantiles = None
 311        self.fit_objs_ = {}
 312        self.y_ = None  # MTS responses (most recent observations first)
 313        self.X_ = None  # MTS lags
 314        self.xreg_ = None
 315        self.y_means_ = {}
 316        self.mean_ = None
 317        self.median_ = None
 318        self.upper_ = None
 319        self.lower_ = None
 320        self.output_dates_ = None
 321        self.preds_std_ = []
 322        self.gaussian_preds_std_ = None
 323        self.alpha_ = None
 324        self.return_std_ = None
 325        self.df_ = None
 326        self.residuals_ = []
 327        self.abs_calib_residuals_ = None
 328        self.calib_residuals_quantile_ = None
 329        self.residuals_sims_ = None
 330        self.kde_ = None
 331        self.sims_ = None
 332        self.residuals_std_dev_ = None
 333        self.n_obs_ = None
 334        self.level_ = None
 335        self.init_n_series_ = None
 336
 337    def fit(self, X, xreg=None, **kwargs):
 338        """Fit MTS model to training data X, with optional regressors xreg
 339
 340        Parameters:
 341
 342        X: {array-like}, shape = [n_samples, n_features]
 343            Training time series, where n_samples is the number
 344            of samples and n_features is the number of features;
 345            X must be in increasing order (most recent observations last)
 346
 347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 348            Additional (external) regressors to be passed to self.obj
 349            xreg must be in 'increasing' order (most recent observations last)
 350
 351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 352
 353        Returns:
 354
 355        self: object
 356        """
 357        try:
 358            self.init_n_series_ = X.shape[1]
 359        except IndexError as e:
 360            self.init_n_series_ = 1
 361
 362        # Automatic lag selection if requested
 363        if isinstance(self.lags, str):
 364            max_lags = min(25, X.shape[0] // 4)
 365            best_ic = float("inf")
 366            best_lags = 1
 367
 368            if self.verbose:
 369                print(
 370                    f"\nSelecting optimal number of lags using {self.lags}..."
 371                )
 372                iterator = tqdm(range(1, max_lags + 1))
 373            else:
 374                iterator = range(1, max_lags + 1)
 375
 376            for lag in iterator:
 377                # Convert DataFrame to numpy array before reversing
 378                if isinstance(X, pd.DataFrame):
 379                    X_values = X.values[::-1]
 380                else:
 381                    X_values = X[::-1]
 382
 383                # Try current lag value
 384                if self.init_n_series_ > 1:
 385                    mts_input = ts.create_train_inputs(X_values, lag)
 386                else:
 387                    mts_input = ts.create_train_inputs(
 388                        X_values.reshape(-1, 1), lag
 389                    )
 390
 391                # Cook training set and fit model
 392                dummy_y, scaled_Z = self.cook_training_set(
 393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 394                )
 395                residuals_ = []
 396
 397                for i in range(self.init_n_series_):
 398                    y_mean = np.mean(mts_input[0][:, i])
 399                    centered_y_i = mts_input[0][:, i] - y_mean
 400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
 401                    residuals_.append(
 402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 403                    )
 404
 405                self.residuals_ = np.asarray(residuals_).T
 406                ic = self._compute_information_criterion(
 407                    curr_lags=lag, criterion=self.lags
 408                )
 409
 410                if self.verbose:
 411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 412
 413                if ic < best_ic:
 414                    best_ic = ic
 415                    best_lags = lag
 416
 417            if self.verbose:
 418                print(
 419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 420                )
 421
 422            self.lags = best_lags
 423
 424        self.input_dates = None
 425        self.df_ = None
 426
 427        if isinstance(X, pd.DataFrame) is False:
 428            # input data set is a numpy array
 429            if xreg is None:
 430                X = pd.DataFrame(X)
 431                self.series_names = [
 432                    "series" + str(i) for i in range(X.shape[1])
 433                ]
 434            else:
 435                # xreg is not None
 436                X = mo.cbind(X, xreg)
 437                self.xreg_ = xreg
 438
 439        else:  # input data set is a DataFrame with column names
 440            X_index = None
 441            if X.index is not None:
 442                X_index = X.index
 443            if xreg is None:
 444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 445            else:
 446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 447                self.xreg_ = xreg
 448            if X_index is not None:
 449                X.index = X_index
 450            self.series_names = X.columns.tolist()
 451
 452        if isinstance(X, pd.DataFrame):
 453            if self.df_ is None:
 454                self.df_ = X
 455                X = X.values
 456            else:
 457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 458                frequency = pd.infer_freq(input_dates_prev)
 459                self.df_ = pd.concat([self.df_, X], axis=0)
 460                self.input_dates = pd.date_range(
 461                    start=input_dates_prev[0],
 462                    periods=len(input_dates_prev) + X.shape[0],
 463                    freq=frequency,
 464                ).values.tolist()
 465                self.df_.index = self.input_dates
 466                X = self.df_.values
 467            self.df_.columns = self.series_names
 468        else:
 469            if self.df_ is None:
 470                self.df_ = pd.DataFrame(X, columns=self.series_names)
 471            else:
 472                self.df_ = pd.concat(
 473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 474                    axis=0,
 475                )
 476
 477        self.input_dates = ts.compute_input_dates(self.df_)
 478
 479        try:
 480            # multivariate time series
 481            n, p = X.shape
 482        except:
 483            # univariate time series
 484            n = X.shape[0]
 485            p = 1
 486        self.n_obs_ = n
 487
 488        rep_1_n = np.repeat(1, n)
 489
 490        self.y_ = None
 491        self.X_ = None
 492        self.n_series = p
 493        self.fit_objs_.clear()
 494        self.y_means_.clear()
 495        residuals_ = []
 496        self.residuals_ = None
 497        self.residuals_sims_ = None
 498        self.kde_ = None
 499        self.sims_ = None
 500        self.scaled_Z_ = None
 501        self.centered_y_is_ = []
 502
 503        if self.init_n_series_ > 1:
 504            # multivariate time series
 505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 506        else:
 507            # univariate time series
 508            mts_input = ts.create_train_inputs(
 509                X.reshape(-1, 1)[::-1], self.lags
 510            )
 511
 512        self.y_ = mts_input[0]
 513
 514        self.X_ = mts_input[1]
 515
 516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 517
 518        self.scaled_Z_ = scaled_Z
 519
 520        # loop on all the time series and adjust self.obj.fit
 521        if self.verbose > 0:
 522            print(
 523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 524            )
 525
 526        if self.show_progress is True:
 527            iterator = tqdm(range(self.init_n_series_))
 528        else:
 529            iterator = range(self.init_n_series_)
 530
 531        if self.type_pi in (
 532            "gaussian",
 533            "kde",
 534            "bootstrap",
 535            "block-bootstrap",
 536        ) or self.type_pi.startswith("vine"):
 537            for i in iterator:
 538                y_mean = np.mean(self.y_[:, i])
 539                self.y_means_[i] = y_mean
 540                centered_y_i = self.y_[:, i] - y_mean
 541                self.centered_y_is_.append(centered_y_i)
 542                self.obj.fit(X=scaled_Z, y=centered_y_i)
 543                self.fit_objs_[i] = deepcopy(self.obj)
 544                residuals_.append(
 545                    (
 546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 547                    ).tolist()
 548                )
 549
 550        if self.type_pi == "quantile":
 551            for i in iterator:
 552                y_mean = np.mean(self.y_[:, i])
 553                self.y_means_[i] = y_mean
 554                centered_y_i = self.y_[:, i] - y_mean
 555                self.centered_y_is_.append(centered_y_i)
 556                self.obj.fit(X=scaled_Z, y=centered_y_i)
 557                self.fit_objs_[i] = deepcopy(self.obj)
 558
 559        if self.type_pi.startswith("scp"):
 560            # split conformal prediction
 561            for i in iterator:
 562                n_y = self.y_.shape[0]
 563                n_y_half = n_y // 2
 564                first_half_idx = range(0, n_y_half)
 565                second_half_idx = range(n_y_half, n_y)
 566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
 569                # calibrated residuals actually
 570                residuals_.append(
 571                    (
 572                        self.y_[second_half_idx, i]
 573                        - (
 574                            y_mean_temp
 575                            + self.obj.predict(scaled_Z[second_half_idx, :])
 576                        )
 577                    ).tolist()
 578                )
 579                # fit on the second half
 580                y_mean = np.mean(self.y_[second_half_idx, i])
 581                self.y_means_[i] = y_mean
 582                centered_y_i = self.y_[second_half_idx, i] - y_mean
 583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
 584                self.fit_objs_[i] = deepcopy(self.obj)
 585
 586        self.residuals_ = np.asarray(residuals_).T
 587
 588        if self.type_pi == "gaussian":
 589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 590
 591        if self.type_pi.startswith("scp2"):
 592            # Calculate mean and standard deviation for each column
 593            data_mean = np.mean(self.residuals_, axis=0)
 594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 595            # Center and scale the array using broadcasting
 596            self.residuals_ = (
 597                self.residuals_ - data_mean[np.newaxis, :]
 598            ) / self.residuals_std_dev_[np.newaxis, :]
 599
 600        if self.replications != None and "kde" in self.type_pi:
 601            if self.verbose > 0:
 602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 603            assert self.kernel in (
 604                "gaussian",
 605                "tophat",
 606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 608            grid = GridSearchCV(
 609                KernelDensity(kernel=self.kernel, **kwargs),
 610                param_grid=kernel_bandwidths,
 611            )
 612            grid.fit(self.residuals_)
 613
 614            if self.verbose > 0:
 615                print(
 616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 617                )
 618
 619            self.kde_ = grid.best_estimator_
 620
 621        return self
 622
 623    def partial_fit(self, X, xreg=None, **kwargs):
 624        """partial_fit MTS model to training data X, with optional regressors xreg
 625
 626        Parameters:
 627
 628        X: {array-like}, shape = [n_samples, n_features]
 629            Training time series, where n_samples is the number
 630            of samples and n_features is the number of features;
 631            X must be in increasing order (most recent observations last)
 632
 633        xreg: {array-like}, shape = [n_samples, n_features_xreg]
 634            Additional (external) regressors to be passed to self.obj
 635            xreg must be in 'increasing' order (most recent observations last)
 636
 637        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
 638
 639        Returns:
 640
 641        self: object
 642        """
 643        try:
 644            self.init_n_series_ = X.shape[1]
 645        except IndexError as e:
 646            self.init_n_series_ = 1
 647
 648        # Automatic lag selection if requested
 649        if isinstance(self.lags, str):
 650            max_lags = min(25, X.shape[0] // 4)
 651            best_ic = float("inf")
 652            best_lags = 1
 653
 654            if self.verbose:
 655                print(
 656                    f"\nSelecting optimal number of lags using {self.lags}..."
 657                )
 658                iterator = tqdm(range(1, max_lags + 1))
 659            else:
 660                iterator = range(1, max_lags + 1)
 661
 662            for lag in iterator:
 663                # Convert DataFrame to numpy array before reversing
 664                if isinstance(X, pd.DataFrame):
 665                    X_values = X.values[::-1]
 666                else:
 667                    X_values = X[::-1]
 668
 669                # Try current lag value
 670                if self.init_n_series_ > 1:
 671                    mts_input = ts.create_train_inputs(X_values, lag)
 672                else:
 673                    mts_input = ts.create_train_inputs(
 674                        X_values.reshape(-1, 1), lag
 675                    )
 676
 677                # Cook training set and partial_fit model
 678                dummy_y, scaled_Z = self.cook_training_set(
 679                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 680                )
 681                residuals_ = []
 682
 683                for i in range(self.init_n_series_):
 684                    y_mean = np.mean(mts_input[0][:, i])
 685                    centered_y_i = mts_input[0][:, i] - y_mean
 686                    self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 687                    residuals_.append(
 688                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
 689                    )
 690
 691                self.residuals_ = np.asarray(residuals_).T
 692                ic = self._compute_information_criterion(
 693                    curr_lags=lag, criterion=self.lags
 694                )
 695
 696                if self.verbose:
 697                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 698
 699                if ic < best_ic:
 700                    best_ic = ic
 701                    best_lags = lag
 702
 703            if self.verbose:
 704                print(
 705                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 706                )
 707
 708            self.lags = best_lags
 709
 710        self.input_dates = None
 711        self.df_ = None
 712
 713        if isinstance(X, pd.DataFrame) is False:
 714            # input data set is a numpy array
 715            if xreg is None:
 716                X = pd.DataFrame(X)
 717                if len(X.shape) > 1:
 718                    self.series_names = [
 719                        "series" + str(i) for i in range(X.shape[1])
 720                    ]
 721                else:
 722                    self.series_names = ["series0"]
 723            else:
 724                # xreg is not None
 725                X = mo.cbind(X, xreg)
 726                self.xreg_ = xreg
 727
 728        else:  # input data set is a DataFrame with column names
 729            X_index = None
 730            if X.index is not None:
 731                X_index = X.index
 732            if xreg is None:
 733                X = copy.deepcopy(mo.convert_df_to_numeric(X))
 734            else:
 735                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
 736                self.xreg_ = xreg
 737            if X_index is not None:
 738                X.index = X_index
 739            self.series_names = X.columns.tolist()
 740
 741        if isinstance(X, pd.DataFrame):
 742            if self.df_ is None:
 743                self.df_ = X
 744                X = X.values
 745            else:
 746                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
 747                frequency = pd.infer_freq(input_dates_prev)
 748                self.df_ = pd.concat([self.df_, X], axis=0)
 749                self.input_dates = pd.date_range(
 750                    start=input_dates_prev[0],
 751                    periods=len(input_dates_prev) + X.shape[0],
 752                    freq=frequency,
 753                ).values.tolist()
 754                self.df_.index = self.input_dates
 755                X = self.df_.values
 756            self.df_.columns = self.series_names
 757        else:
 758            if self.df_ is None:
 759                self.df_ = pd.DataFrame(X, columns=self.series_names)
 760            else:
 761                self.df_ = pd.concat(
 762                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
 763                    axis=0,
 764                )
 765
 766        self.input_dates = ts.compute_input_dates(self.df_)
 767
 768        try:
 769            # multivariate time series
 770            n, p = X.shape
 771        except:
 772            # univariate time series
 773            n = X.shape[0]
 774            p = 1
 775        self.n_obs_ = n
 776
 777        rep_1_n = np.repeat(1, n)
 778
 779        self.y_ = None
 780        self.X_ = None
 781        self.n_series = p
 782        self.fit_objs_.clear()
 783        self.y_means_.clear()
 784        residuals_ = []
 785        self.residuals_ = None
 786        self.residuals_sims_ = None
 787        self.kde_ = None
 788        self.sims_ = None
 789        self.scaled_Z_ = None
 790        self.centered_y_is_ = []
 791
 792        if self.init_n_series_ > 1:
 793            # multivariate time series
 794            mts_input = ts.create_train_inputs(X[::-1], self.lags)
 795        else:
 796            # univariate time series
 797            mts_input = ts.create_train_inputs(
 798                X.reshape(-1, 1)[::-1], self.lags
 799            )
 800
 801        self.y_ = mts_input[0]
 802
 803        self.X_ = mts_input[1]
 804
 805        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
 806
 807        self.scaled_Z_ = scaled_Z
 808
 809        # loop on all the time series and adjust self.obj.partial_fit
 810        if self.verbose > 0:
 811            print(
 812                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
 813            )
 814
 815        if self.show_progress is True:
 816            iterator = tqdm(range(self.init_n_series_))
 817        else:
 818            iterator = range(self.init_n_series_)
 819
 820        if self.type_pi in (
 821            "gaussian",
 822            "kde",
 823            "bootstrap",
 824            "block-bootstrap",
 825        ) or self.type_pi.startswith("vine"):
 826            for i in iterator:
 827                y_mean = np.mean(self.y_[:, i])
 828                self.y_means_[i] = y_mean
 829                centered_y_i = self.y_[:, i] - y_mean
 830                self.centered_y_is_.append(centered_y_i)
 831                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 832                self.fit_objs_[i] = deepcopy(self.obj)
 833                residuals_.append(
 834                    (
 835                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
 836                    ).tolist()
 837                )
 838
 839        if self.type_pi == "quantile":
 840            for i in iterator:
 841                y_mean = np.mean(self.y_[:, i])
 842                self.y_means_[i] = y_mean
 843                centered_y_i = self.y_[:, i] - y_mean
 844                self.centered_y_is_.append(centered_y_i)
 845                self.obj.partial_fit(X=scaled_Z, y=centered_y_i)
 846                self.fit_objs_[i] = deepcopy(self.obj)
 847
 848        if self.type_pi.startswith("scp"):
 849            # split conformal prediction
 850            for i in iterator:
 851                n_y = self.y_.shape[0]
 852                n_y_half = n_y // 2
 853                first_half_idx = range(0, n_y_half)
 854                second_half_idx = range(n_y_half, n_y)
 855                y_mean_temp = np.mean(self.y_[first_half_idx, i])
 856                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
 857                self.obj.partial_fit(
 858                    X=scaled_Z[first_half_idx, :], y=centered_y_i_temp
 859                )
 860                # calibrated residuals actually
 861                residuals_.append(
 862                    (
 863                        self.y_[second_half_idx, i]
 864                        - (
 865                            y_mean_temp
 866                            + self.obj.predict(scaled_Z[second_half_idx, :])
 867                        )
 868                    ).tolist()
 869                )
 870                # partial_fit on the second half
 871                y_mean = np.mean(self.y_[second_half_idx, i])
 872                self.y_means_[i] = y_mean
 873                centered_y_i = self.y_[second_half_idx, i] - y_mean
 874                self.obj.partial_fit(
 875                    X=scaled_Z[second_half_idx, :], y=centered_y_i
 876                )
 877                self.fit_objs_[i] = deepcopy(self.obj)
 878
 879        self.residuals_ = np.asarray(residuals_).T
 880
 881        if self.type_pi == "gaussian":
 882            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
 883
 884        if self.type_pi.startswith("scp2"):
 885            # Calculate mean and standard deviation for each column
 886            data_mean = np.mean(self.residuals_, axis=0)
 887            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
 888            # Center and scale the array using broadcasting
 889            self.residuals_ = (
 890                self.residuals_ - data_mean[np.newaxis, :]
 891            ) / self.residuals_std_dev_[np.newaxis, :]
 892
 893        if self.replications != None and "kde" in self.type_pi:
 894            if self.verbose > 0:
 895                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
 896            assert self.kernel in (
 897                "gaussian",
 898                "tophat",
 899            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
 900            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
 901            grid = GridSearchCV(
 902                KernelDensity(kernel=self.kernel, **kwargs),
 903                param_grid=kernel_bandwidths,
 904            )
 905            grid.fit(self.residuals_)
 906
 907            if self.verbose > 0:
 908                print(
 909                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
 910                )
 911
 912            self.kde_ = grid.best_estimator_
 913
 914        return self
 915
 916    def _predict_quantiles(self, h, quantiles, **kwargs):
 917        """Predict arbitrary quantiles from simulated paths."""
 918        # Ensure output dates are set
 919        self.output_dates_, _ = ts.compute_output_dates(self.df_, h)
 920
 921        # Trigger full prediction to generate self.sims_
 922        if not hasattr(self, "sims_") or self.sims_ is None:
 923            _ = self.predict(h=h, level=95, **kwargs)  # Any level triggers sim
 924
 925        result_dict = {}
 926
 927        # Stack simulations: (R, h, n_series)
 928        sims_array = np.stack([sim.values for sim in self.sims_], axis=0)
 929
 930        # Compute quantiles over replication axis
 931        q_values = np.quantile(
 932            sims_array, quantiles, axis=0
 933        )  # (n_q, h, n_series)
 934
 935        for i, q in enumerate(quantiles):
 936            # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95"
 937            q_label = (
 938                f"{int(q * 100):02d}"
 939                if (q * 100).is_integer()
 940                else f"{q:.3f}".replace(".", "_")
 941            )
 942            for series_id in range(self.init_n_series_):
 943                series_name = self.series_names[series_id]
 944                col_name = f"quantile_{q_label}_{series_name}"
 945                result_dict[col_name] = q_values[i, :, series_id]
 946
 947        df_return_quantiles = pd.DataFrame(
 948            result_dict, index=self.output_dates_
 949        )
 950
 951        return df_return_quantiles
 952
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)
1500
1501    def _crps_ensemble(self, y_true, simulations, axis=0):
1502        """
1503        Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations.
1504
1505        The CRPS is a measure of the distance between the cumulative distribution
1506        function (CDF) of a forecast and the CDF of the observed value. This method
1507        computes the CRPS in a vectorized form for an ensemble of simulations, efficiently
1508        handling the case where there is only one simulation.
1509
1510        Parameters
1511        ----------
1512        y_true : array_like, shape (n,)
1513            A 1D array of true values (observations).
1514            Each element represents the true value for a given sample.
1515
1516        simulations : array_like, shape (n, R)
1517            A 2D array of simulated values. Each row corresponds to a different sample
1518            and each column corresponds to a different simulation of that sample.
1519
1520        axis : int, optional, default=0
1521            Axis along which to transpose the simulations if needed.
1522            If axis=0, the simulations are transposed to shape (R, n).
1523
1524        Returns
1525        -------
1526        crps : ndarray, shape (n,)
1527            A 1D array of CRPS scores, one for each sample.
1528
1529        Notes
1530        -----
1531        The CRPS score is computed as:
1532
1533        CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|]
1534
1535        Where:
1536        - `X` is the ensemble of simulations.
1537        - `y` is the true value.
1538        - `X'` is a second independent sample from the ensemble.
1539
1540        The calculation is vectorized to optimize performance for large datasets.
1541
1542        The edge case where `R=1` (only one simulation) is handled by returning
1543        only `term1` (i.e., no ensemble spread).
1544        """
1545        sims = np.asarray(simulations)  # Convert simulations to numpy array
1546        if axis == 0:
1547            sims = sims.T  # Transpose if the axis is 0
1548        n, R = sims.shape  # n = number of samples, R = number of simulations
1549        # Term 1: E|X - y|, average absolute difference between simulations and true value
1550        term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1)
1551        # Handle edge case: if R == 1, return term1 (no spread in ensemble)
1552        if R == 1:
1553            return term1
1554        # Term 2: 0.5 * E|X - X'|, using efficient sorted formula
1555        sims_sorted = np.sort(sims, axis=1)  # Sort simulations along each row
1556        # Correct coefficients for efficient calculation
1557        j = np.arange(R)  # 0-indexed positions in the sorted simulations
1558        coefficients = (2 * (j + 1) - R - 1) / (
1559            R * (R - 1)
1560        )  # Efficient coefficient calculation
1561        # Dot product along the second axis (over the simulations)
1562        term2 = np.dot(sims_sorted, coefficients)
1563        # Return CRPS score: term1 - 0.5 * term2
1564        return term1 - 0.5 * term2
1565
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)
1677
1678    def plot(self, series=None, type_axis="dates", type_plot="pi"):
1679        """Plot time series forecast
1680
1681        Parameters:
1682
1683        series: {integer} or {string}
1684            series index or name
1685
1686        """
1687
1688        assert all(
1689            [
1690                self.mean_ is not None,
1691                self.lower_ is not None,
1692                self.upper_ is not None,
1693                self.output_dates_ is not None,
1694            ]
1695        ), "model forecasting must be obtained first (with predict)"
1696
1697        if series is None:
1698            # assert (
1699            #    self.init_n_series_ == 1
1700            # ), "please specify series index or name (n_series > 1)"
1701            series = 0
1702
1703        if isinstance(series, str):
1704            assert (
1705                series in self.series_names
1706            ), f"series {series} doesn't exist in the input dataset"
1707            series_idx = self.df_.columns.get_loc(series)
1708        else:
1709            assert isinstance(series, int) and (
1710                0 <= series < self.n_series
1711            ), f"check series index (< {self.n_series})"
1712            series_idx = series
1713
1714        y_all = list(self.df_.iloc[:, series_idx]) + list(
1715            self.mean_.iloc[:, series_idx]
1716        )
1717        y_test = list(self.mean_.iloc[:, series_idx])
1718        n_points_all = len(y_all)
1719        n_points_train = self.df_.shape[0]
1720
1721        if type_axis == "numeric":
1722            x_all = [i for i in range(n_points_all)]
1723            x_test = [i for i in range(n_points_train, n_points_all)]
1724
1725        if type_axis == "dates":  # use dates
1726            x_all = np.concatenate(
1727                (self.input_dates.values, self.output_dates_.values), axis=None
1728            )
1729            x_test = self.output_dates_.values
1730
1731        if type_plot == "pi":
1732            fig, ax = plt.subplots()
1733            ax.plot(x_all, y_all, "-")
1734            ax.plot(x_test, y_test, "-", color="orange")
1735            ax.fill_between(
1736                x_test,
1737                self.lower_.iloc[:, series_idx],
1738                self.upper_.iloc[:, series_idx],
1739                alpha=0.2,
1740                color="orange",
1741            )
1742            if self.replications is None:
1743                if self.n_series > 1:
1744                    plt.title(
1745                        f"prediction intervals for {series}",
1746                        loc="left",
1747                        fontsize=12,
1748                        fontweight=0,
1749                        color="black",
1750                    )
1751                else:
1752                    plt.title(
1753                        f"prediction intervals for input time series",
1754                        loc="left",
1755                        fontsize=12,
1756                        fontweight=0,
1757                        color="black",
1758                    )
1759                plt.show()
1760            else:  # self.replications is not None
1761                if self.n_series > 1:
1762                    plt.title(
1763                        f"prediction intervals for {self.replications} simulations of {series}",
1764                        loc="left",
1765                        fontsize=12,
1766                        fontweight=0,
1767                        color="black",
1768                    )
1769                else:
1770                    plt.title(
1771                        f"prediction intervals for {self.replications} simulations of input time series",
1772                        loc="left",
1773                        fontsize=12,
1774                        fontweight=0,
1775                        color="black",
1776                    )
1777                plt.show()
1778
1779        if type_plot == "spaghetti":
1780            palette = plt.get_cmap("Set1")
1781            sims_ix = getsims(self.sims_, series_idx)
1782            plt.plot(x_all, y_all, "-")
1783            for col_ix in range(
1784                sims_ix.shape[1]
1785            ):  # avoid this when there are thousands of simulations
1786                plt.plot(
1787                    x_test,
1788                    sims_ix[:, col_ix],
1789                    "-",
1790                    color=palette(col_ix),
1791                    linewidth=1,
1792                    alpha=0.9,
1793                )
1794            plt.plot(x_all, y_all, "-", color="black")
1795            plt.plot(x_test, y_test, "-", color="blue")
1796            # Add titles
1797            if self.n_series > 1:
1798                plt.title(
1799                    f"{self.replications} simulations of {series}",
1800                    loc="left",
1801                    fontsize=12,
1802                    fontweight=0,
1803                    color="black",
1804                )
1805            else:
1806                plt.title(
1807                    f"{self.replications} simulations of input time series",
1808                    loc="left",
1809                    fontsize=12,
1810                    fontweight=0,
1811                    color="black",
1812                )
1813            plt.xlabel("Time")
1814            plt.ylabel("Values")
1815            # Show the graph
1816            plt.show()
1817
1818    def cross_val_score(
1819        self,
1820        X,
1821        scoring="root_mean_squared_error",
1822        n_jobs=None,
1823        verbose=0,
1824        xreg=None,
1825        initial_window=5,
1826        horizon=3,
1827        fixed_window=False,
1828        show_progress=True,
1829        level=95,
1830        alpha=0.5,
1831        **kwargs,
1832    ):
1833        """Evaluate a score by time series cross-validation.
1834
1835        Parameters:
1836
1837            X: {array-like, sparse matrix} of shape (n_samples, n_features)
1838                The data to fit.
1839
1840            scoring: str or a function
1841                A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error',
1842                'mean_absolute_error', 'mean_error', 'mean_percentage_error',
1843                'mean_absolute_percentage_error',  'winkler_score', 'coverage')
1844                Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries`
1845
1846            n_jobs: int, default=None
1847                Number of jobs to run in parallel.
1848
1849            verbose: int, default=0
1850                The verbosity level.
1851
1852            xreg: array-like, optional (default=None)
1853                Additional (external) regressors to be passed to `fit`
1854                xreg must be in 'increasing' order (most recent observations last)
1855
1856            initial_window: int
1857                initial number of consecutive values in each training set sample
1858
1859            horizon: int
1860                number of consecutive values in test set sample
1861
1862            fixed_window: boolean
1863                if False, all training samples start at index 0, and the training
1864                window's size is increasing.
1865                if True, the training window's size is fixed, and the window is
1866                rolling forward
1867
1868            show_progress: boolean
1869                if True, a progress bar is printed
1870
1871            level: int
1872                confidence level for prediction intervals
1873
1874            alpha: float
1875                quantile level for pinball loss if scoring='pinball'
1876                0 < alpha < 1
1877
1878            **kwargs: dict
1879                additional parameters to be passed to `fit` and `predict`
1880
1881        Returns:
1882
1883            A tuple: descriptive statistics or errors and raw errors
1884
1885        """
1886        tscv = TimeSeriesSplit()
1887
1888        tscv_obj = tscv.split(
1889            X,
1890            initial_window=initial_window,
1891            horizon=horizon,
1892            fixed_window=fixed_window,
1893        )
1894
1895        if isinstance(scoring, str):
1896            assert scoring in (
1897                "pinball",
1898                "crps",
1899                "root_mean_squared_error",
1900                "mean_squared_error",
1901                "mean_error",
1902                "mean_absolute_error",
1903                "mean_percentage_error",
1904                "mean_absolute_percentage_error",
1905                "winkler_score",
1906                "coverage",
1907            ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error',  'winkler_score', 'coverage')"
1908
1909            def err_func(X_test, X_pred, scoring, alpha=0.5):
1910                if (self.replications is not None) or (
1911                    self.type_pi == "gaussian"
1912                ):  # probabilistic
1913                    if scoring == "pinball":
1914                        # Predict requested quantile
1915                        q_pred = self.predict(
1916                            h=len(X_test), quantiles=[alpha], **kwargs
1917                        )
1918                        # Handle multivariate
1919                        scores = []
1920                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1921                        for j in range(p):
1922                            series_name = getattr(
1923                                self, "series_names", [f"Series_{j}"]
1924                            )[j]
1925                            q_label = (
1926                                f"{int(alpha * 100):02d}"
1927                                if (alpha * 100).is_integer()
1928                                else f"{alpha:.3f}".replace(".", "_")
1929                            )
1930                            col = f"quantile_{q_label}_{series_name}"
1931                            if col not in q_pred.columns:
1932                                raise ValueError(
1933                                    f"Column '{col}' not found in quantile forecast output."
1934                                )
1935                            try:
1936                                y_true_j = X_test[:, j] if p > 1 else X_test
1937                            except:
1938                                y_true_j = (
1939                                    X_test.iloc[:, j]
1940                                    if p > 1
1941                                    else X_test.values
1942                                )
1943                            y_pred_j = q_pred[col].values
1944                            # Compute pinball loss for this series
1945                            loss = mean_pinball_loss(
1946                                y_true_j, y_pred_j, alpha=alpha
1947                            )
1948                            scores.append(loss)
1949                        # Return average over series
1950                        return np.mean(scores)
1951                    elif scoring == "crps":
1952                        # Ensure simulations exist
1953                        _ = self.predict(
1954                            h=len(X_test), **kwargs
1955                        )  # triggers self.sims_
1956                        # Extract simulations: list of DataFrames → (R, h, p)
1957                        sims_vals = np.stack(
1958                            [sim.values for sim in self.sims_], axis=0
1959                        )  # (R, h, p)
1960                        crps_scores = []
1961                        p = X_test.shape[1] if len(X_test.shape) > 1 else 1
1962                        for j in range(p):
1963                            try:
1964                                y_true_j = X_test[:, j] if p > 1 else X_test
1965                            except Exception as e:
1966                                y_true_j = (
1967                                    X_test.iloc[:, j]
1968                                    if p > 1
1969                                    else X_test.values
1970                                )
1971                            sims_j = sims_vals[:, :, j]  # (R, h)
1972                            crps_j = self._crps_ensemble(
1973                                np.asarray(y_true_j), sims_j
1974                            )
1975                            crps_scores.append(
1976                                np.mean(crps_j)
1977                            )  # average over horizon
1978                        return np.mean(crps_scores)  # average over series
1979                    if scoring == "winkler_score":
1980                        return winkler_score(X_pred, X_test, level=level)
1981                    elif scoring == "coverage":
1982                        return coverage(X_pred, X_test, level=level)
1983                    else:
1984                        return mean_errors(
1985                            pred=X_pred.mean, actual=X_test, scoring=scoring
1986                        )
1987                else:  # not probabilistic
1988                    return mean_errors(
1989                        pred=X_pred, actual=X_test, scoring=scoring
1990                    )
1991
1992        else:  # isinstance(scoring, str) = False
1993            err_func = scoring
1994
1995        errors = []
1996
1997        train_indices = []
1998
1999        test_indices = []
2000
2001        for train_index, test_index in tscv_obj:
2002            train_indices.append(train_index)
2003            test_indices.append(test_index)
2004
2005        if show_progress is True:
2006            iterator = tqdm(
2007                zip(train_indices, test_indices), total=len(train_indices)
2008            )
2009        else:
2010            iterator = zip(train_indices, test_indices)
2011
2012        for train_index, test_index in iterator:
2013            if verbose == 1:
2014                print(f"TRAIN: {train_index}")
2015                print(f"TEST: {test_index}")
2016
2017            if isinstance(X, pd.DataFrame):
2018                self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs)
2019                X_test = X.iloc[test_index, :]
2020            else:
2021                self.fit(X[train_index, :], xreg=xreg, **kwargs)
2022                X_test = X[test_index, :]
2023            X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs)
2024
2025            errors.append(err_func(X_test, X_pred, scoring, alpha=alpha))
2026
2027        res = np.asarray(errors)
2028
2029        return res, describe(res)
2030
2031    def _compute_information_criterion(self, curr_lags, criterion="AIC"):
2032        """Compute information criterion using existing residuals
2033
2034        Parameters
2035        ----------
2036        curr_lags : int
2037            Current number of lags being evaluated
2038        criterion : str
2039            One of 'AIC', 'AICc', or 'BIC'
2040
2041        Returns
2042        -------
2043        float
2044            Information criterion value or inf if parameters exceed observations
2045        """
2046        # Get dimensions
2047        n_obs = self.residuals_.shape[0]
2048        n_features = int(self.init_n_series_ * curr_lags)
2049        n_hidden = int(self.n_hidden_features)
2050        # Calculate number of parameters
2051        term1 = int(n_features * n_hidden)
2052        term2 = int(n_hidden * self.init_n_series_)
2053        n_params = term1 + term2
2054        # Check if we have enough observations for the number of parameters
2055        if n_obs <= n_params + 1:
2056            return float("inf")  # Return infinity if too many parameters
2057        # Compute RSS using existing residuals
2058        rss = np.sum(self.residuals_**2)
2059        # Compute criterion
2060        if criterion == "AIC":
2061            ic = n_obs * np.log(rss / n_obs) + 2 * n_params
2062        elif criterion == "AICc":
2063            ic = n_obs * np.log(rss / n_obs) + 2 * n_params * (
2064                n_obs / (n_obs - n_params - 1)
2065            )
2066        else:  # BIC
2067            ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs)
2068
2069        return ic

Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks

Parameters:

obj: object.
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict()).

n_hidden_features: int.
    number of nodes in the hidden layer.

activation_name: str.
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.

a: float.
    hyperparameter for 'prelu' or 'elu' activation function.

nodes_sim: str.
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'.

bias: boolean.
    indicates if the hidden layer contains a bias term (True) or not
    (False).

dropout: float.
    regularization parameter; (random) percentage of nodes dropped out
    of the training.

direct_link: boolean.
    indicates if the original predictors are included (True) in model's fitting or not (False).

n_clusters: int.
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).

cluster_encode: bool.
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding.

type_clust: str.
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm').

type_scaling: a tuple of 3 strings.
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax').

lags: int.
    number of lags used for each time series.
    If string, lags must be one of 'AIC', 'AICc', or 'BIC'.

type_pi: str.
    type of prediction interval; currently:
    - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
    - "quantile": use model-agnostic quantile regression under the hood
    - "kde": based on Kernel Density Estimation of in-sample residuals
    - "bootstrap": based on independent bootstrap of in-sample residuals
    - "block-bootstrap": based on basic block bootstrap of in-sample residuals
    - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
    - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
    - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
    - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
    - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
    - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
    - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
    'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
    - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
    'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
    - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
    'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'

level: int.
    level of confidence for `type_pi == 'quantile'` (default is `95`)

block_size: int.
    size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
    Default is round(3.15*(n_residuals^1/3))

replications: int.
    number of replications (if needed, for predictive simulation). Default is 'None'.

kernel: str.
    the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.

agg: str.
    either "mean" or "median" for simulation of bootstrap aggregating

seed: int.
    reproducibility seed for nodes_sim=='uniform' or predictive simulation.

backend: str.
    "cpu" or "gpu" or "tpu".

verbose: int.
    0: not printing; 1: printing

show_progress: bool.
    True: progress bar when fitting each series; False: no progress bar when fitting each series

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

y_: {array-like}
    MTS responses (most recent observations first)

X_: {array-like}
    MTS lags

xreg_: {array-like}
    external regressors

y_means_: dict
    a dictionary of each series mean values

preds_: {array-like}
    successive model predictions

preds_std_: {array-like}
    standard deviation around the predictions for Bayesian base learners (`obj`)

gaussian_preds_std_: {array-like}
    standard deviation around the predictions for `type_pi='gaussian'`

return_std_: boolean
    return uncertainty or not (set in predict)

df_: data frame
    the input data frame, in case a data.frame is provided to `fit`

n_obs_: int
    number of time series observations (number of rows for multivariate)

level_: int
    level of confidence for prediction intervals (default is 95)

residuals_: {array-like}
    in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
    (for `type_pi` in conformal prediction)

residuals_sims_: tuple of {array-like}
    simulations of in-sample residuals (for `type_pi` not conformal prediction) or
    calibrated residuals (for `type_pi` in conformal prediction)

kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html

residuals_std_dev_: residuals standard deviation

Examples:

Example 1:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)

M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)

# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))

Example 2:

import nnetsauce as ns
import numpy as np
from sklearn import linear_model

dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)

# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())

# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))

print(obj_MTS.predict(return_std=True, level=95))
def fit(self, X, xreg=None, **kwargs):
337    def fit(self, X, xreg=None, **kwargs):
338        """Fit MTS model to training data X, with optional regressors xreg
339
340        Parameters:
341
342        X: {array-like}, shape = [n_samples, n_features]
343            Training time series, where n_samples is the number
344            of samples and n_features is the number of features;
345            X must be in increasing order (most recent observations last)
346
347        xreg: {array-like}, shape = [n_samples, n_features_xreg]
348            Additional (external) regressors to be passed to self.obj
349            xreg must be in 'increasing' order (most recent observations last)
350
351        **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
352
353        Returns:
354
355        self: object
356        """
357        try:
358            self.init_n_series_ = X.shape[1]
359        except IndexError as e:
360            self.init_n_series_ = 1
361
362        # Automatic lag selection if requested
363        if isinstance(self.lags, str):
364            max_lags = min(25, X.shape[0] // 4)
365            best_ic = float("inf")
366            best_lags = 1
367
368            if self.verbose:
369                print(
370                    f"\nSelecting optimal number of lags using {self.lags}..."
371                )
372                iterator = tqdm(range(1, max_lags + 1))
373            else:
374                iterator = range(1, max_lags + 1)
375
376            for lag in iterator:
377                # Convert DataFrame to numpy array before reversing
378                if isinstance(X, pd.DataFrame):
379                    X_values = X.values[::-1]
380                else:
381                    X_values = X[::-1]
382
383                # Try current lag value
384                if self.init_n_series_ > 1:
385                    mts_input = ts.create_train_inputs(X_values, lag)
386                else:
387                    mts_input = ts.create_train_inputs(
388                        X_values.reshape(-1, 1), lag
389                    )
390
391                # Cook training set and fit model
392                dummy_y, scaled_Z = self.cook_training_set(
393                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
394                )
395                residuals_ = []
396
397                for i in range(self.init_n_series_):
398                    y_mean = np.mean(mts_input[0][:, i])
399                    centered_y_i = mts_input[0][:, i] - y_mean
400                    self.obj.fit(X=scaled_Z, y=centered_y_i)
401                    residuals_.append(
402                        (centered_y_i - self.obj.predict(scaled_Z)).tolist()
403                    )
404
405                self.residuals_ = np.asarray(residuals_).T
406                ic = self._compute_information_criterion(
407                    curr_lags=lag, criterion=self.lags
408                )
409
410                if self.verbose:
411                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
412
413                if ic < best_ic:
414                    best_ic = ic
415                    best_lags = lag
416
417            if self.verbose:
418                print(
419                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
420                )
421
422            self.lags = best_lags
423
424        self.input_dates = None
425        self.df_ = None
426
427        if isinstance(X, pd.DataFrame) is False:
428            # input data set is a numpy array
429            if xreg is None:
430                X = pd.DataFrame(X)
431                self.series_names = [
432                    "series" + str(i) for i in range(X.shape[1])
433                ]
434            else:
435                # xreg is not None
436                X = mo.cbind(X, xreg)
437                self.xreg_ = xreg
438
439        else:  # input data set is a DataFrame with column names
440            X_index = None
441            if X.index is not None:
442                X_index = X.index
443            if xreg is None:
444                X = copy.deepcopy(mo.convert_df_to_numeric(X))
445            else:
446                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
447                self.xreg_ = xreg
448            if X_index is not None:
449                X.index = X_index
450            self.series_names = X.columns.tolist()
451
452        if isinstance(X, pd.DataFrame):
453            if self.df_ is None:
454                self.df_ = X
455                X = X.values
456            else:
457                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
458                frequency = pd.infer_freq(input_dates_prev)
459                self.df_ = pd.concat([self.df_, X], axis=0)
460                self.input_dates = pd.date_range(
461                    start=input_dates_prev[0],
462                    periods=len(input_dates_prev) + X.shape[0],
463                    freq=frequency,
464                ).values.tolist()
465                self.df_.index = self.input_dates
466                X = self.df_.values
467            self.df_.columns = self.series_names
468        else:
469            if self.df_ is None:
470                self.df_ = pd.DataFrame(X, columns=self.series_names)
471            else:
472                self.df_ = pd.concat(
473                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
474                    axis=0,
475                )
476
477        self.input_dates = ts.compute_input_dates(self.df_)
478
479        try:
480            # multivariate time series
481            n, p = X.shape
482        except:
483            # univariate time series
484            n = X.shape[0]
485            p = 1
486        self.n_obs_ = n
487
488        rep_1_n = np.repeat(1, n)
489
490        self.y_ = None
491        self.X_ = None
492        self.n_series = p
493        self.fit_objs_.clear()
494        self.y_means_.clear()
495        residuals_ = []
496        self.residuals_ = None
497        self.residuals_sims_ = None
498        self.kde_ = None
499        self.sims_ = None
500        self.scaled_Z_ = None
501        self.centered_y_is_ = []
502
503        if self.init_n_series_ > 1:
504            # multivariate time series
505            mts_input = ts.create_train_inputs(X[::-1], self.lags)
506        else:
507            # univariate time series
508            mts_input = ts.create_train_inputs(
509                X.reshape(-1, 1)[::-1], self.lags
510            )
511
512        self.y_ = mts_input[0]
513
514        self.X_ = mts_input[1]
515
516        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
517
518        self.scaled_Z_ = scaled_Z
519
520        # loop on all the time series and adjust self.obj.fit
521        if self.verbose > 0:
522            print(
523                f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n"
524            )
525
526        if self.show_progress is True:
527            iterator = tqdm(range(self.init_n_series_))
528        else:
529            iterator = range(self.init_n_series_)
530
531        if self.type_pi in (
532            "gaussian",
533            "kde",
534            "bootstrap",
535            "block-bootstrap",
536        ) or self.type_pi.startswith("vine"):
537            for i in iterator:
538                y_mean = np.mean(self.y_[:, i])
539                self.y_means_[i] = y_mean
540                centered_y_i = self.y_[:, i] - y_mean
541                self.centered_y_is_.append(centered_y_i)
542                self.obj.fit(X=scaled_Z, y=centered_y_i)
543                self.fit_objs_[i] = deepcopy(self.obj)
544                residuals_.append(
545                    (
546                        centered_y_i - self.fit_objs_[i].predict(scaled_Z)
547                    ).tolist()
548                )
549
550        if self.type_pi == "quantile":
551            for i in iterator:
552                y_mean = np.mean(self.y_[:, i])
553                self.y_means_[i] = y_mean
554                centered_y_i = self.y_[:, i] - y_mean
555                self.centered_y_is_.append(centered_y_i)
556                self.obj.fit(X=scaled_Z, y=centered_y_i)
557                self.fit_objs_[i] = deepcopy(self.obj)
558
559        if self.type_pi.startswith("scp"):
560            # split conformal prediction
561            for i in iterator:
562                n_y = self.y_.shape[0]
563                n_y_half = n_y // 2
564                first_half_idx = range(0, n_y_half)
565                second_half_idx = range(n_y_half, n_y)
566                y_mean_temp = np.mean(self.y_[first_half_idx, i])
567                centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp
568                self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp)
569                # calibrated residuals actually
570                residuals_.append(
571                    (
572                        self.y_[second_half_idx, i]
573                        - (
574                            y_mean_temp
575                            + self.obj.predict(scaled_Z[second_half_idx, :])
576                        )
577                    ).tolist()
578                )
579                # fit on the second half
580                y_mean = np.mean(self.y_[second_half_idx, i])
581                self.y_means_[i] = y_mean
582                centered_y_i = self.y_[second_half_idx, i] - y_mean
583                self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i)
584                self.fit_objs_[i] = deepcopy(self.obj)
585
586        self.residuals_ = np.asarray(residuals_).T
587
588        if self.type_pi == "gaussian":
589            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
590
591        if self.type_pi.startswith("scp2"):
592            # Calculate mean and standard deviation for each column
593            data_mean = np.mean(self.residuals_, axis=0)
594            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
595            # Center and scale the array using broadcasting
596            self.residuals_ = (
597                self.residuals_ - data_mean[np.newaxis, :]
598            ) / self.residuals_std_dev_[np.newaxis, :]
599
600        if self.replications != None and "kde" in self.type_pi:
601            if self.verbose > 0:
602                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
603            assert self.kernel in (
604                "gaussian",
605                "tophat",
606            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
607            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
608            grid = GridSearchCV(
609                KernelDensity(kernel=self.kernel, **kwargs),
610                param_grid=kernel_bandwidths,
611            )
612            grid.fit(self.residuals_)
613
614            if self.verbose > 0:
615                print(
616                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
617                )
618
619            self.kde_ = grid.best_estimator_
620
621        return self

Fit MTS model to training data X, with optional regressors xreg

Parameters:

X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)

xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)

**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)

Returns:

self: object

def predict(self, h=5, level=95, quantiles=None, **kwargs):
 953    def predict(self, h=5, level=95, quantiles=None, **kwargs):
 954        """Forecast all the time series, h steps ahead"""
 955
 956        if quantiles is not None:
 957            # Validate
 958            quantiles = np.asarray(quantiles)
 959            if not ((quantiles > 0) & (quantiles < 1)).all():
 960                raise ValueError("quantiles must be between 0 and 1.")
 961            # Delegate to dedicated method
 962            return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs)
 963
 964        if isinstance(level, list) or isinstance(level, np.ndarray):
 965            # Store results
 966            result_dict = {}
 967            # Loop through alphas and calculate lower/upper for each alpha level
 968            # E.g [0.5, 2.5, 5, 16.5, 25, 50]
 969            for lev in level:
 970                # Get the forecast for this alpha
 971                res = self.predict(h=h, level=lev, **kwargs)
 972                # Adjust index and collect lower/upper bounds
 973                res.lower.index = pd.to_datetime(res.lower.index)
 974                res.upper.index = pd.to_datetime(res.upper.index)
 975                # Loop over each time series (multivariate) and flatten results
 976                if isinstance(res.lower, pd.DataFrame):
 977                    for (
 978                        series
 979                    ) in (
 980                        res.lower.columns
 981                    ):  # Assumes 'lower' and 'upper' have multiple series
 982                        result_dict[f"lower_{lev}_{series}"] = (
 983                            res.lower[series].to_numpy().flatten()
 984                        )
 985                        result_dict[f"upper_{lev}_{series}"] = (
 986                            res.upper[series].to_numpy().flatten()
 987                        )
 988                else:
 989                    for series_id in range(
 990                        self.n_series
 991                    ):  # Assumes 'lower' and 'upper' have multiple series
 992                        result_dict[f"lower_{lev}_{series_id}"] = (
 993                            res.lower[series_id, :].to_numpy().flatten()
 994                        )
 995                        result_dict[f"upper_{lev}_{series_id}"] = (
 996                            res.upper[series_id, :].to_numpy().flatten()
 997                        )
 998            return pd.DataFrame(result_dict, index=self.output_dates_)
 999
1000        # only one prediction interval
1001        self.output_dates_, frequency = ts.compute_output_dates(self.df_, h)
1002
1003        self.level_ = level
1004
1005        self.return_std_ = False  # do not remove (/!\)
1006
1007        self.mean_ = None  # do not remove (/!\)
1008
1009        self.mean_ = deepcopy(self.y_)  # do not remove (/!\)
1010
1011        self.lower_ = None  # do not remove (/!\)
1012
1013        self.upper_ = None  # do not remove (/!\)
1014
1015        self.sims_ = None  # do not remove (/!\)
1016
1017        y_means_ = np.asarray(
1018            [self.y_means_[i] for i in range(self.init_n_series_)]
1019        )
1020
1021        n_features = self.init_n_series_ * self.lags
1022
1023        self.alpha_ = 100 - level
1024
1025        pi_multiplier = norm.ppf(1 - self.alpha_ / 200)
1026
1027        if "return_std" in kwargs:  # bayesian forecasting
1028            self.return_std_ = True
1029            self.preds_std_ = []
1030            DescribeResult = namedtuple(
1031                "DescribeResult", ("mean", "lower", "upper")
1032            )  # to be updated
1033
1034        if "return_pi" in kwargs:  # split conformal, without simulation
1035            mean_pi_ = []
1036            lower_pi_ = []
1037            upper_pi_ = []
1038            median_pi_ = []
1039            DescribeResult = namedtuple(
1040                "DescribeResult", ("mean", "lower", "upper")
1041            )  # to be updated
1042
1043        if self.kde_ != None and "kde" in self.type_pi:  # kde
1044            target_cols = self.df_.columns[
1045                : self.init_n_series_
1046            ]  # Get target column names
1047            if self.verbose == 1:
1048                self.residuals_sims_ = tuple(
1049                    self.kde_.sample(
1050                        n_samples=h, random_state=self.seed + 100 * i
1051                    )  # Keep full sample
1052                    for i in tqdm(range(self.replications))
1053                )
1054            elif self.verbose == 0:
1055                self.residuals_sims_ = tuple(
1056                    self.kde_.sample(
1057                        n_samples=h, random_state=self.seed + 100 * i
1058                    )  # Keep full sample
1059                    for i in range(self.replications)
1060                )
1061
1062            # Convert to DataFrames after sampling
1063            self.residuals_sims_ = tuple(
1064                pd.DataFrame(
1065                    sim,  # Keep all columns
1066                    columns=target_cols,  # Use original target column names
1067                    index=self.output_dates_,
1068                )
1069                for sim in self.residuals_sims_
1070            )
1071
1072        if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"):
1073            assert self.replications is not None and isinstance(
1074                self.replications, int
1075            ), "'replications' must be provided and be an integer"
1076            if self.verbose == 1:
1077                self.residuals_sims_ = tuple(
1078                    ts.bootstrap(
1079                        self.residuals_,
1080                        h=h,
1081                        block_size=None,
1082                        seed=self.seed + 100 * i,
1083                    )
1084                    for i in tqdm(range(self.replications))
1085                )
1086            elif self.verbose == 0:
1087                self.residuals_sims_ = tuple(
1088                    ts.bootstrap(
1089                        self.residuals_,
1090                        h=h,
1091                        block_size=None,
1092                        seed=self.seed + 100 * i,
1093                    )
1094                    for i in range(self.replications)
1095                )
1096
1097        if self.type_pi in (
1098            "block-bootstrap",
1099            "scp-block-bootstrap",
1100            "scp2-block-bootstrap",
1101        ):
1102            if self.block_size is None:
1103                self.block_size = int(
1104                    np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3)))
1105                )
1106
1107            assert self.replications is not None and isinstance(
1108                self.replications, int
1109            ), "'replications' must be provided and be an integer"
1110            if self.verbose == 1:
1111                self.residuals_sims_ = tuple(
1112                    ts.bootstrap(
1113                        self.residuals_,
1114                        h=h,
1115                        block_size=self.block_size,
1116                        seed=self.seed + 100 * i,
1117                    )
1118                    for i in tqdm(range(self.replications))
1119                )
1120            elif self.verbose == 0:
1121                self.residuals_sims_ = tuple(
1122                    ts.bootstrap(
1123                        self.residuals_,
1124                        h=h,
1125                        block_size=self.block_size,
1126                        seed=self.seed + 100 * i,
1127                    )
1128                    for i in range(self.replications)
1129                )
1130
1131        if "vine" in self.type_pi:
1132            if self.verbose == 1:
1133                self.residuals_sims_ = tuple(
1134                    vinecopula_sample(
1135                        x=self.residuals_,
1136                        n_samples=h,
1137                        method=self.type_pi,
1138                        random_state=self.seed + 100 * i,
1139                    )
1140                    for i in tqdm(range(self.replications))
1141                )
1142            elif self.verbose == 0:
1143                self.residuals_sims_ = tuple(
1144                    vinecopula_sample(
1145                        x=self.residuals_,
1146                        n_samples=h,
1147                        method=self.type_pi,
1148                        random_state=self.seed + 100 * i,
1149                    )
1150                    for i in range(self.replications)
1151                )
1152
1153        mean_ = deepcopy(self.mean_)
1154
1155        for i in range(h):
1156            new_obs = ts.reformat_response(mean_, self.lags)
1157            new_X = new_obs.reshape(1, -1)
1158            cooked_new_X = self.cook_test_set(new_X, **kwargs)
1159
1160            if "return_std" in kwargs:
1161                self.preds_std_.append(
1162                    [
1163                        np.asarray(
1164                            self.fit_objs_[i].predict(
1165                                cooked_new_X, return_std=True
1166                            )[1]
1167                        ).item()
1168                        for i in range(self.n_series)
1169                    ]
1170                )
1171
1172            if "return_pi" in kwargs:
1173                for i in range(self.n_series):
1174                    preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs)
1175                    mean_pi_.append(preds_pi.mean[0])
1176                    lower_pi_.append(preds_pi.lower[0])
1177                    upper_pi_.append(preds_pi.upper[0])
1178
1179            if self.type_pi != "quantile":
1180                predicted_cooked_new_X = np.asarray(
1181                    [
1182                        np.asarray(
1183                            self.fit_objs_[i].predict(cooked_new_X)
1184                        ).item()
1185                        for i in range(self.init_n_series_)
1186                    ]
1187                )
1188            else:
1189                predicted_cooked_new_X = np.asarray(
1190                    [
1191                        np.asarray(
1192                            self.fit_objs_[i]
1193                            .predict(cooked_new_X, return_pi=True)
1194                            .upper
1195                        ).item()
1196                        for i in range(self.init_n_series_)
1197                    ]
1198                )
1199
1200            preds = np.asarray(y_means_ + predicted_cooked_new_X)
1201
1202            # Create full row with both predictions and external regressors
1203            if self.xreg_ is not None and "xreg" in kwargs:
1204                next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten()
1205                full_row = np.concatenate([preds, next_xreg])
1206            else:
1207                full_row = preds
1208
1209            # Create a new row with same number of columns as mean_
1210            new_row = np.zeros((1, mean_.shape[1]))
1211            new_row[0, : full_row.shape[0]] = full_row
1212
1213            # Maintain the full dimensionality by using vstack instead of rbind
1214            mean_ = np.vstack([new_row, mean_[:-1]])
1215
1216        # Final output should only include the target columns
1217        self.mean_ = pd.DataFrame(
1218            mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][
1219                ::-1
1220            ],
1221            columns=self.df_.columns[: self.init_n_series_],
1222            index=self.output_dates_,
1223        )
1224
1225        # function's return ----------------------------------------------------------------------
1226        if (
1227            (("return_std" not in kwargs) and ("return_pi" not in kwargs))
1228            and (self.type_pi not in ("gaussian", "scp"))
1229        ) or ("vine" in self.type_pi):
1230            if self.replications is None:
1231                return self.mean_.iloc[:, : self.init_n_series_]
1232
1233            # if "return_std" not in kwargs and self.replications is not None
1234            meanf = []
1235            medianf = []
1236            lower = []
1237            upper = []
1238
1239            if "scp2" in self.type_pi:
1240                if self.verbose == 1:
1241                    self.sims_ = tuple(
1242                        (
1243                            self.mean_
1244                            + self.residuals_sims_[i]
1245                            * self.residuals_std_dev_[np.newaxis, :]
1246                            for i in tqdm(range(self.replications))
1247                        )
1248                    )
1249                elif self.verbose == 0:
1250                    self.sims_ = tuple(
1251                        (
1252                            self.mean_
1253                            + self.residuals_sims_[i]
1254                            * self.residuals_std_dev_[np.newaxis, :]
1255                            for i in range(self.replications)
1256                        )
1257                    )
1258            else:
1259                if self.verbose == 1:
1260                    self.sims_ = tuple(
1261                        (
1262                            self.mean_ + self.residuals_sims_[i]
1263                            for i in tqdm(range(self.replications))
1264                        )
1265                    )
1266                elif self.verbose == 0:
1267                    self.sims_ = tuple(
1268                        (
1269                            self.mean_ + self.residuals_sims_[i]
1270                            for i in range(self.replications)
1271                        )
1272                    )
1273
1274            DescribeResult = namedtuple(
1275                "DescribeResult", ("mean", "sims", "lower", "upper")
1276            )
1277            for ix in range(self.init_n_series_):
1278                sims_ix = getsims(self.sims_, ix)
1279                if self.agg == "mean":
1280                    meanf.append(np.mean(sims_ix, axis=1))
1281                else:
1282                    medianf.append(np.median(sims_ix, axis=1))
1283                lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1))
1284                upper.append(
1285                    np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)
1286                )
1287            self.mean_ = pd.DataFrame(
1288                np.asarray(meanf).T,
1289                columns=self.series_names[
1290                    : self.init_n_series_
1291                ],  # self.df_.columns,
1292                index=self.output_dates_,
1293            )
1294
1295            self.lower_ = pd.DataFrame(
1296                np.asarray(lower).T,
1297                columns=self.series_names[
1298                    : self.init_n_series_
1299                ],  # self.df_.columns,
1300                index=self.output_dates_,
1301            )
1302
1303            self.upper_ = pd.DataFrame(
1304                np.asarray(upper).T,
1305                columns=self.series_names[
1306                    : self.init_n_series_
1307                ],  # self.df_.columns,
1308                index=self.output_dates_,
1309            )
1310
1311            try:
1312                self.median_ = pd.DataFrame(
1313                    np.asarray(medianf).T,
1314                    columns=self.series_names[
1315                        : self.init_n_series_
1316                    ],  # self.df_.columns,
1317                    index=self.output_dates_,
1318                )
1319            except Exception as e:
1320                pass
1321
1322            return DescribeResult(
1323                self.mean_, self.sims_, self.lower_, self.upper_
1324            )
1325
1326        if (
1327            (("return_std" in kwargs) or ("return_pi" in kwargs))
1328            and (self.type_pi not in ("gaussian", "scp"))
1329        ) or "vine" in self.type_pi:
1330            DescribeResult = namedtuple(
1331                "DescribeResult", ("mean", "lower", "upper")
1332            )
1333
1334            self.mean_ = pd.DataFrame(
1335                np.asarray(self.mean_),
1336                columns=self.series_names,  # self.df_.columns,
1337                index=self.output_dates_,
1338            )
1339
1340            if "return_std" in kwargs:
1341                self.preds_std_ = np.asarray(self.preds_std_)
1342
1343                self.lower_ = pd.DataFrame(
1344                    self.mean_.values - pi_multiplier * self.preds_std_,
1345                    columns=self.series_names,  # self.df_.columns,
1346                    index=self.output_dates_,
1347                )
1348
1349                self.upper_ = pd.DataFrame(
1350                    self.mean_.values + pi_multiplier * self.preds_std_,
1351                    columns=self.series_names,  # self.df_.columns,
1352                    index=self.output_dates_,
1353                )
1354
1355            if "return_pi" in kwargs:
1356                self.lower_ = pd.DataFrame(
1357                    np.asarray(lower_pi_).reshape(h, self.n_series)
1358                    + y_means_[np.newaxis, :],
1359                    columns=self.series_names,  # self.df_.columns,
1360                    index=self.output_dates_,
1361                )
1362
1363                self.upper_ = pd.DataFrame(
1364                    np.asarray(upper_pi_).reshape(h, self.n_series)
1365                    + y_means_[np.newaxis, :],
1366                    columns=self.series_names,  # self.df_.columns,
1367                    index=self.output_dates_,
1368                )
1369
1370            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1371
1372            if self.xreg_ is not None:
1373                if len(self.xreg_.shape) > 1:
1374                    res2 = mx.tuple_map(
1375                        res,
1376                        lambda x: mo.delete_last_columns(
1377                            x, num_columns=self.xreg_.shape[1]
1378                        ),
1379                    )
1380                else:
1381                    res2 = mx.tuple_map(
1382                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1383                    )
1384                return DescribeResult(res2[0], res2[1], res2[2])
1385
1386            return res
1387
1388        if self.type_pi == "gaussian":
1389            DescribeResult = namedtuple(
1390                "DescribeResult", ("mean", "lower", "upper")
1391            )
1392
1393            self.mean_ = pd.DataFrame(
1394                np.asarray(self.mean_),
1395                columns=self.series_names,  # self.df_.columns,
1396                index=self.output_dates_,
1397            )
1398
1399            # Use Bayesian std if available, otherwise use gaussian residual std
1400            if "return_std" in kwargs and len(self.preds_std_) > 0:
1401                preds_std_to_use = np.asarray(self.preds_std_)
1402            else:
1403                preds_std_to_use = self.gaussian_preds_std_
1404
1405            self.lower_ = pd.DataFrame(
1406                self.mean_.values - pi_multiplier * preds_std_to_use,
1407                columns=self.series_names,  # self.df_.columns,
1408                index=self.output_dates_,
1409            )
1410
1411            self.upper_ = pd.DataFrame(
1412                self.mean_.values + pi_multiplier * preds_std_to_use,
1413                columns=self.series_names,  # self.df_.columns,
1414                index=self.output_dates_,
1415            )
1416
1417            res = DescribeResult(self.mean_, self.lower_, self.upper_)
1418
1419            if self.xreg_ is not None:
1420                if len(self.xreg_.shape) > 1:
1421                    res2 = mx.tuple_map(
1422                        res,
1423                        lambda x: mo.delete_last_columns(
1424                            x, num_columns=self.xreg_.shape[1]
1425                        ),
1426                    )
1427                else:
1428                    res2 = mx.tuple_map(
1429                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1430                    )
1431                return DescribeResult(res2[0], res2[1], res2[2])
1432
1433            return res
1434
1435        if self.type_pi == "quantile":
1436            DescribeResult = namedtuple("DescribeResult", ("mean"))
1437
1438            self.mean_ = pd.DataFrame(
1439                np.asarray(self.mean_),
1440                columns=self.series_names,  # self.df_.columns,
1441                index=self.output_dates_,
1442            )
1443
1444            res = DescribeResult(self.mean_)
1445
1446            if self.xreg_ is not None:
1447                if len(self.xreg_.shape) > 1:
1448                    res2 = mx.tuple_map(
1449                        res,
1450                        lambda x: mo.delete_last_columns(
1451                            x, num_columns=self.xreg_.shape[1]
1452                        ),
1453                    )
1454                else:
1455                    res2 = mx.tuple_map(
1456                        res, lambda x: mo.delete_last_columns(x, num_columns=1)
1457                    )
1458                return DescribeResult(res2[0])
1459
1460            return res
1461
1462        # After prediction loop, ensure sims only contain target columns
1463        if self.sims_ is not None:
1464            if self.verbose == 1:
1465                self.sims_ = tuple(
1466                    sim[:h,]  # Only keep target columns and h rows
1467                    for sim in tqdm(self.sims_)
1468                )
1469            elif self.verbose == 0:
1470                self.sims_ = tuple(
1471                    sim[:h,]  # Only keep target columns and h rows
1472                    for sim in self.sims_
1473                )
1474
1475            # Convert numpy arrays to DataFrames with proper columns
1476            self.sims_ = tuple(
1477                pd.DataFrame(
1478                    sim,
1479                    columns=self.df_.columns[: self.init_n_series_],
1480                    index=self.output_dates_,
1481                )
1482                for sim in self.sims_
1483            )
1484
1485        if self.type_pi in (
1486            "kde",
1487            "bootstrap",
1488            "block-bootstrap",
1489            "vine-copula",
1490        ):
1491            if self.xreg_ is not None:
1492                # Use getsimsxreg when external regressors are present
1493                target_cols = self.df_.columns[: self.init_n_series_]
1494                self.sims_ = getsimsxreg(
1495                    self.sims_, self.output_dates_, target_cols
1496                )
1497            else:
1498                # Use original getsims for backward compatibility
1499                self.sims_ = getsims(self.sims_)

Forecast all the time series, h steps ahead

def score( self, X, training_index, testing_index, scoring=None, alpha=0.5, **kwargs):
1566    def score(
1567        self,
1568        X,
1569        training_index,
1570        testing_index,
1571        scoring=None,
1572        alpha=0.5,
1573        **kwargs,
1574    ):
1575        """Train on training_index, score on testing_index."""
1576
1577        assert (
1578            bool(set(training_index).intersection(set(testing_index))) == False
1579        ), "Non-overlapping 'training_index' and 'testing_index' required"
1580
1581        # Dimensions
1582        try:
1583            # multivariate time series
1584            n, p = X.shape
1585        except:
1586            # univariate time series
1587            n = X.shape[0]
1588            p = 1
1589
1590        # Training and testing sets
1591        if p > 1:
1592            X_train = X[training_index, :]
1593            X_test = X[testing_index, :]
1594        else:
1595            X_train = X[training_index]
1596            X_test = X[testing_index]
1597
1598        # Horizon
1599        h = len(testing_index)
1600        assert (
1601            len(training_index) + h
1602        ) <= n, "Please check lengths of training and testing windows"
1603
1604        # Fit and predict
1605        self.fit(X_train, **kwargs)
1606        preds = self.predict(h=h, **kwargs)
1607
1608        if scoring is None:
1609            scoring = "neg_root_mean_squared_error"
1610
1611        if scoring == "pinball":
1612            # Predict requested quantile
1613            q_pred = self.predict(h=h, quantiles=[alpha], **kwargs)
1614            # Handle multivariate
1615            scores = []
1616            for j in range(p):
1617                series_name = getattr(self, "series_names", [f"Series_{j}"])[j]
1618                q_label = (
1619                    f"{int(alpha * 100):02d}"
1620                    if (alpha * 100).is_integer()
1621                    else f"{alpha:.3f}".replace(".", "_")
1622                )
1623                col = f"quantile_{q_label}_{series_name}"
1624                if col not in q_pred.columns:
1625                    raise ValueError(
1626                        f"Column '{col}' not found in quantile forecast output."
1627                    )
1628                y_true_j = X_test[:, j]
1629                y_pred_j = q_pred[col].values
1630                # Compute pinball loss for this series
1631                loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha)
1632                scores.append(loss)
1633            # Return average over series
1634            return np.mean(scores)
1635
1636        if scoring == "crps":
1637            # Ensure simulations exist
1638            preds = self.predict(h=h, **kwargs)  # triggers self.sims_
1639            # Extract simulations: list of DataFrames → (R, h, p)
1640            sims_vals = np.stack(
1641                [sim.values for sim in self.sims_], axis=0
1642            )  # (R, h, p)
1643            crps_scores = []
1644            for j in range(p):
1645                y_true_j = X_test[:, j]
1646                sims_j = sims_vals[:, :, j]  # (R, h)
1647                crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j)
1648                crps_scores.append(np.mean(crps_j))  # average over horizon
1649            return np.mean(crps_scores)  # average over series
1650
1651        # check inputs
1652        assert scoring in (
1653            "explained_variance",
1654            "neg_mean_absolute_error",
1655            "neg_mean_squared_error",
1656            "neg_root_mean_squared_error",
1657            "neg_mean_squared_log_error",
1658            "neg_median_absolute_error",
1659            "r2",
1660        ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \
1661                               'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \
1662                               'neg_median_absolute_error', 'r2')"
1663
1664        scoring_options = {
1665            "explained_variance": skm2.explained_variance_score,
1666            "neg_mean_absolute_error": skm2.mean_absolute_error,
1667            "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2),
1668            "neg_root_mean_squared_error": lambda x, y: np.sqrt(
1669                np.mean((x - y) ** 2)
1670            ),
1671            "neg_mean_squared_log_error": skm2.mean_squared_log_error,
1672            "neg_median_absolute_error": skm2.median_absolute_error,
1673            "r2": skm2.r2_score,
1674        }
1675
1676        return scoring_options[scoring](X_test, preds)

Train on training_index, score on testing_index.

class MTSStacker(nnetsauce.MTS):
 12class MTSStacker(MTS):
 13    """
 14    Sequential stacking for time series with unified strategy.
 15
 16    Core Strategy:
 17    1. Split data: half1 (base models) | half2 (meta-model)
 18    2. Train base models on half1, predict half2
 19    3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...]
 20       Stack as additional time series, extract target series
 21    4. Train meta-MTS on half2 with augmented data
 22    5. Retrain base models on half2 for temporal alignment
 23    6. At prediction: base models forecast → augment → meta-model predicts
 24    """
 25
 26    def __init__(
 27        self,
 28        base_models,
 29        meta_model,
 30        split_ratio=0.5,
 31    ):
 32        """
 33        Parameters
 34        ----------
 35        base_models : list of sklearn-compatible models
 36            Base models (e.g., Ridge, Lasso, RandomForest)
 37        meta_model : nnetsauce.MTS instance
 38            MTS with type_pi='scp2-kde' or similar
 39        split_ratio : float
 40            Proportion for half1 (default: 0.5)
 41        """
 42        self.base_models = base_models
 43        self.meta_model = meta_model
 44        self.split_ratio = split_ratio
 45        self.fitted_base_models_ = []
 46        self.split_idx_ = None
 47        self.mean_ = None
 48        self.lower_ = None
 49        self.upper_ = None
 50        self.sims_ = None
 51        self.output_dates_ = None
 52
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self
184
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df
398
399    def plot(self, series=None, **kwargs):
400        """
401        Plot the time series with forecasts and prediction intervals.
402
403        Parameters
404        ----------
405        series : str or int, optional
406            Name or index of the series to plot (default: 0)
407        **kwargs : dict
408            Additional parameters for plotting
409        """
410        # Ensure we have predictions
411        if self.mean_ is None:
412            raise ValueError(
413                "Model forecasting must be obtained first (call predict)"
414            )
415
416        # Convert series name to index if needed
417        if isinstance(series, str):
418            if series in self.series_names:
419                series_idx = self.series_names.index(series)
420            else:
421                raise ValueError(
422                    f"Series '{series}' doesn't exist in the input dataset"
423                )
424        else:
425            series_idx = series if series is not None else 0
426
427        # Check bounds
428        if series_idx < 0 or series_idx >= self.n_series_:
429            raise ValueError(
430                f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})"
431            )
432
433        # Prepare data for plotting
434        import matplotlib.pyplot as plt
435        import matplotlib.dates as mdates
436
437        # Get historical data
438        historical_data = self.df_.iloc[:, series_idx]
439        forecast_data = self.mean_.iloc[:, series_idx]
440
441        # Get prediction intervals if available
442        has_intervals = self.lower_ is not None and self.upper_ is not None
443        if has_intervals:
444            lower_data = self.lower_.iloc[:, series_idx]
445            upper_data = self.upper_.iloc[:, series_idx]
446
447        # Create figure
448        fig, ax = plt.subplots(figsize=(12, 6))
449
450        # Plot historical data
451        if isinstance(self.df_.index, pd.DatetimeIndex):
452            hist_index = self.df_.index
453            ax.plot(
454                hist_index,
455                historical_data,
456                "-",
457                label="Historical",
458                color="blue",
459                linewidth=1.5,
460            )
461
462            # Plot forecast
463            forecast_index = self.mean_.index
464            ax.plot(
465                forecast_index,
466                forecast_data,
467                "-",
468                label="Forecast",
469                color="red",
470                linewidth=1.5,
471            )
472
473            # Plot prediction intervals
474            if has_intervals:
475                ax.fill_between(
476                    forecast_index,
477                    lower_data,
478                    upper_data,
479                    alpha=0.3,
480                    color="red",
481                    label="Prediction Interval",
482                )
483
484            # Add vertical line at the split point
485            if self.split_idx_ is not None:
486                split_date = hist_index[self.split_idx_]
487                ax.axvline(
488                    x=split_date,
489                    color="gray",
490                    linestyle="--",
491                    alpha=0.5,
492                    label="Train Split",
493                )
494
495            # Format x-axis for dates
496            ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
497            fig.autofmt_xdate()
498        else:
499            # Numeric indices
500            n_points_train = len(self.df_)
501            n_points_forecast = len(self.mean_)
502
503            x_hist = np.arange(n_points_train)
504            x_forecast = np.arange(
505                n_points_train, n_points_train + n_points_forecast
506            )
507
508            ax.plot(
509                x_hist,
510                historical_data,
511                "-",
512                label="Historical",
513                color="blue",
514                linewidth=1.5,
515            )
516            ax.plot(
517                x_forecast,
518                forecast_data,
519                "-",
520                label="Forecast",
521                color="red",
522                linewidth=1.5,
523            )
524
525            if has_intervals:
526                ax.fill_between(
527                    x_forecast,
528                    lower_data,
529                    upper_data,
530                    alpha=0.3,
531                    color="red",
532                    label="Prediction Interval",
533                )
534
535            if self.split_idx_ is not None:
536                ax.axvline(
537                    x=self.split_idx_,
538                    color="gray",
539                    linestyle="--",
540                    alpha=0.5,
541                    label="Train Split",
542                )
543
544        # Set title and labels
545        series_name = (
546            self.series_names[series_idx]
547            if series_idx < len(self.series_names)
548            else f"Series {series_idx}"
549        )
550        plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold")
551        plt.xlabel("Time")
552        plt.ylabel("Value")
553        plt.legend()
554        plt.grid(True, alpha=0.3)
555        plt.tight_layout()
556        plt.show()

Sequential stacking for time series with unified strategy.

Core Strategy:

  1. Split data: half1 (base models) | half2 (meta-model)
  2. Train base models on half1, predict half2
  3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
  4. Train meta-MTS on half2 with augmented data
  5. Retrain base models on half2 for temporal alignment
  6. At prediction: base models forecast → augment → meta-model predicts
def fit(self, X, xreg=None, **kwargs):
 53    def fit(self, X, xreg=None, **kwargs):
 54        """
 55        Fit MTSStacker using sequential stacking strategy.
 56
 57        Parameters
 58        ----------
 59        X : array-like or DataFrame, shape (n_samples, n_features)
 60            Training time series (most recent observations last)
 61        xreg : array-like, optional
 62            External regressors
 63        **kwargs : dict
 64            Additional parameters for base and meta models
 65
 66        Returns
 67        -------
 68        self : object
 69        """
 70        # 1. Store attributes and convert to DataFrame if needed
 71        if isinstance(X, pd.DataFrame):
 72            self.df_ = X.copy()
 73            X_array = X.values
 74            self.series_names = X.columns.tolist()
 75        else:
 76            X_array = np.asarray(X)
 77            self.df_ = pd.DataFrame(X_array)
 78            self.series_names = [f"series{i}" for i in range(X_array.shape[1])]
 79
 80        n_samples = X_array.shape[0]
 81        self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1
 82
 83        # 2. Split data into half1 and half2
 84        split_idx = int(n_samples * self.split_ratio)
 85        self.split_idx_ = split_idx
 86
 87        if split_idx < self.meta_model.lags:
 88            raise ValueError(
 89                f"Split creates insufficient data: split_idx={split_idx} < "
 90                f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags."
 91            )
 92
 93        half1 = X_array[:split_idx]
 94        half2 = X_array[split_idx:]
 95
 96        # 3. Train base models on half1 and predict half2
 97        base_preds = []
 98        temp_base_models = []
 99
100        for base_model in self.base_models:
101            # Wrap in MTS with same config as meta_model
102            base_mts = MTS(
103                obj=clone(base_model),
104                lags=self.meta_model.lags,
105                n_hidden_features=self.meta_model.n_hidden_features,
106                replications=self.meta_model.replications,
107                kernel=self.meta_model.kernel,
108                type_pi=None,  # No prediction intervals for base models
109            )
110            base_mts.fit(half1)
111
112            # Predict half2
113            pred = base_mts.predict(h=len(half2))
114
115            # Handle different return types
116            if isinstance(pred, pd.DataFrame):
117                base_preds.append(pred.values)
118            elif isinstance(pred, np.ndarray):
119                base_preds.append(pred)
120            elif hasattr(pred, "mean"):
121                # Named tuple with mean attribute
122                mean_pred = pred.mean
123                base_preds.append(
124                    mean_pred.values
125                    if isinstance(mean_pred, pd.DataFrame)
126                    else mean_pred
127                )
128            else:
129                raise ValueError(f"Unexpected prediction type: {type(pred)}")
130
131            temp_base_models.append(base_mts)
132
133        # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...]
134        base_preds_array = np.hstack(
135            base_preds
136        )  # shape: (len(half2), n_series * n_base_models)
137
138        if isinstance(X, pd.DataFrame):
139            half2_df = pd.DataFrame(
140                half2,
141                index=self.df_.index[split_idx:],
142                columns=self.series_names,
143            )
144            base_preds_df = pd.DataFrame(
145                base_preds_array,
146                index=self.df_.index[split_idx:],
147                columns=[
148                    f"base_{i}_{j}"
149                    for i in range(len(self.base_models))
150                    for j in range(self.n_series_)
151                ],
152            )
153            augmented = pd.concat([half2_df, base_preds_df], axis=1)
154        else:
155            augmented = np.hstack([half2, base_preds_array])
156
157        # 5. Train meta-model on augmented half2
158        self.meta_model.fit(augmented, xreg=xreg, **kwargs)
159
160        # Store meta-model attributes
161        self.output_dates_ = self.meta_model.output_dates_
162        self.fit_objs_ = self.meta_model.fit_objs_
163        self.y_ = self.meta_model.y_
164        self.X_ = self.meta_model.X_
165        self.xreg_ = self.meta_model.xreg_
166        self.y_means_ = self.meta_model.y_means_
167        self.residuals_ = self.meta_model.residuals_
168
169        # 6. FIXED: Retrain base models on half2 for temporal alignment
170        self.fitted_base_models_ = []
171        for i, base_model in enumerate(self.base_models):
172            base_mts_final = MTS(
173                obj=clone(base_model),
174                lags=self.meta_model.lags,
175                n_hidden_features=self.meta_model.n_hidden_features,
176                replications=self.meta_model.replications,
177                kernel=self.meta_model.kernel,
178                type_pi=None,
179            )
180            base_mts_final.fit(half2)
181            self.fitted_base_models_.append(base_mts_final)
182
183        return self

Fit MTSStacker using sequential stacking strategy.

Parameters

X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models

Returns

self : object

def predict(self, h=5, level=95, **kwargs):
185    def predict(self, h=5, level=95, **kwargs):
186        """
187        Forecast h steps ahead using stacked predictions.
188
189        FIXED: Now properly generates base model forecasts and uses them
190        to create augmented features for the meta-model.
191
192        Parameters
193        ----------
194        h : int
195            Forecast horizon
196        level : int
197            Confidence level for prediction intervals
198        **kwargs : dict
199            Additional parameters for prediction
200
201        Returns
202        -------
203        DescribeResult or DataFrame
204            Predictions with optional intervals/simulations
205        """
206        # Step 1: Generate base model forecasts for horizon h
207        base_forecasts = []
208
209        for base_mts in self.fitted_base_models_:
210            # Each base model forecasts h steps ahead
211            forecast = base_mts.predict(h=h)
212
213            # Extract mean prediction
214            if isinstance(forecast, pd.DataFrame):
215                base_forecasts.append(forecast.values)
216            elif isinstance(forecast, np.ndarray):
217                base_forecasts.append(forecast)
218            elif hasattr(forecast, "mean"):
219                mean_pred = forecast.mean
220                base_forecasts.append(
221                    mean_pred.values
222                    if isinstance(mean_pred, pd.DataFrame)
223                    else mean_pred
224                )
225            else:
226                raise ValueError(f"Unexpected forecast type: {type(forecast)}")
227
228        # Step 2: Stack base forecasts into augmented features
229        base_forecasts_array = np.hstack(
230            base_forecasts
231        )  # shape: (h, n_series * n_base)
232
233        # Step 3: Create augmented input for meta-model
234        # The meta-model needs the original series structure + base predictions
235        # We use recursive forecasting: predict one step, update history, repeat
236
237        # Get last window of data from training
238        last_window = self.df_.iloc[-self.meta_model.lags:].values
239
240        # Initialize containers for results
241        all_forecasts = []
242        all_lowers = [] if level is not None else None
243        all_uppers = [] if level is not None else None
244        all_sims = (
245            []
246            if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi
247            else None
248        )
249
250        # Recursive forecasting
251        current_window = last_window.copy()
252
253        for step in range(h):
254            # Create augmented input: [current_window_last_row | base_forecast_step]
255            # Note: meta-model was trained on [original | base_preds]
256            # For prediction, we need to simulate this structure
257
258            # Use the base forecast for this step
259            base_forecast_step = base_forecasts_array[
260                step: step + 1, :
261            ]  # shape: (1, n_base_features)
262
263            # Create a dummy augmented dataset for this step
264            # Combine last observed values with base predictions
265            last_obs = current_window[-1:, :]  # shape: (1, n_series)
266            augmented_step = np.hstack([last_obs, base_forecast_step])
267
268            # Convert to DataFrame if needed
269            if isinstance(self.df_, pd.DataFrame):
270                augmented_df = pd.DataFrame(
271                    augmented_step,
272                    columns=(
273                        self.series_names
274                        + [
275                            f"base_{i}_{j}"
276                            for i in range(len(self.base_models))
277                            for j in range(self.n_series_)
278                        ]
279                    ),
280                )
281            else:
282                augmented_df = augmented_step
283
284            # Predict one step with meta-model
285            # This is tricky: we need to use meta-model's internal predict
286            # but with our augmented data structure
287
288            # For now, use the standard predict and extract one step
289            step_result = self.meta_model.predict(h=1, level=level, **kwargs)
290
291            # Extract forecasts
292            if isinstance(step_result, pd.DataFrame):
293                forecast_step = step_result.iloc[0, : self.n_series_].values
294                all_forecasts.append(forecast_step)
295            elif isinstance(step_result, np.ndarray):
296                forecast_step = step_result[0, : self.n_series_]
297                all_forecasts.append(forecast_step)
298            elif hasattr(step_result, "mean"):
299                mean_pred = step_result.mean
300                if isinstance(mean_pred, pd.DataFrame):
301                    forecast_step = mean_pred.iloc[0, : self.n_series_].values
302                else:
303                    forecast_step = mean_pred[0, : self.n_series_]
304                all_forecasts.append(forecast_step)
305
306                # Extract intervals if available
307                if hasattr(step_result, "lower") and all_lowers is not None:
308                    lower_pred = step_result.lower
309                    if isinstance(lower_pred, pd.DataFrame):
310                        all_lowers.append(
311                            lower_pred.iloc[0, : self.n_series_].values
312                        )
313                    else:
314                        all_lowers.append(lower_pred[0, : self.n_series_])
315
316                if hasattr(step_result, "upper") and all_uppers is not None:
317                    upper_pred = step_result.upper
318                    if isinstance(upper_pred, pd.DataFrame):
319                        all_uppers.append(
320                            upper_pred.iloc[0, : self.n_series_].values
321                        )
322                    else:
323                        all_uppers.append(upper_pred[0, : self.n_series_])
324
325                # Extract simulations if available
326                if hasattr(step_result, "sims") and all_sims is not None:
327                    all_sims.append(step_result.sims)
328
329            # Update window for next iteration
330            current_window = np.vstack(
331                [current_window[1:], forecast_step.reshape(1, -1)]
332            )
333
334        # Combine all forecasts
335        forecasts_array = np.array(all_forecasts)
336
337        # Create output dates
338        if hasattr(self.df_, "index") and isinstance(
339            self.df_.index, pd.DatetimeIndex
340        ):
341            last_date = self.df_.index[-1]
342            freq = pd.infer_freq(self.df_.index)
343            if freq:
344                output_dates = pd.date_range(
345                    start=last_date, periods=h + 1, freq=freq
346                )[1:]
347            else:
348                output_dates = pd.RangeIndex(
349                    start=len(self.df_), stop=len(self.df_) + h
350                )
351        else:
352            output_dates = pd.RangeIndex(
353                start=len(self.df_), stop=len(self.df_) + h
354            )
355
356        self.output_dates_ = output_dates
357
358        # Format output
359        mean_df = pd.DataFrame(
360            forecasts_array,
361            index=output_dates,
362            columns=self.series_names[: self.n_series_],
363        )
364        self.mean_ = mean_df
365
366        # Return based on what was computed
367        if all_lowers and all_uppers:
368            lowers_array = np.array(all_lowers)
369            uppers_array = np.array(all_uppers)
370
371            lower_df = pd.DataFrame(
372                lowers_array,
373                index=output_dates,
374                columns=self.series_names[: self.n_series_],
375            )
376            upper_df = pd.DataFrame(
377                uppers_array,
378                index=output_dates,
379                columns=self.series_names[: self.n_series_],
380            )
381
382            self.lower_ = lower_df
383            self.upper_ = upper_df
384
385            if all_sims:
386                self.sims_ = tuple(all_sims)
387                DescribeResult = namedtuple(
388                    "DescribeResult", ("mean", "sims", "lower", "upper")
389                )
390                return DescribeResult(mean_df, self.sims_, lower_df, upper_df)
391            else:
392                DescribeResult = namedtuple(
393                    "DescribeResult", ("mean", "lower", "upper")
394                )
395                return DescribeResult(mean_df, lower_df, upper_df)
396        else:
397            return mean_df

Forecast h steps ahead using stacked predictions.

FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.

Parameters

h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction

Returns

DescribeResult or DataFrame Predictions with optional intervals/simulations

class MultiOutputMTS(nnetsauce.MTS):
 14class MultiOutputMTS(MTS):
 15    """MTS subclass optimized for multivariate time series with vectorized models
 16
 17    Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop.
 18    Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)
 19    """
 20
 21    def fit(self, X, xreg=None, **kwargs):
 22        """Fit with vectorized multi-output model - requires n_series >= 2"""
 23
 24        # Validate multivariate input
 25        try:
 26            self.init_n_series_ = X.shape[1]
 27        except IndexError:
 28            raise ValueError(
 29                "MultiOutputMTS requires multivariate input (n_samples, n_series)"
 30            )
 31
 32        if self.init_n_series_ < 2:
 33            raise ValueError(
 34                f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}"
 35            )
 36
 37        # Automatic lag selection if requested (copied from parent)
 38        if isinstance(self.lags, str):
 39            max_lags = min(25, X.shape[0] // 4)
 40            best_ic = float("inf")
 41            best_lags = 1
 42
 43            if self.verbose:
 44                print(
 45                    f"\nSelecting optimal number of lags using {self.lags}..."
 46                )
 47                iterator = tqdm(range(1, max_lags + 1))
 48            else:
 49                iterator = range(1, max_lags + 1)
 50
 51            for lag in iterator:
 52                if isinstance(X, pd.DataFrame):
 53                    X_values = X.values[::-1]
 54                else:
 55                    X_values = X[::-1]
 56
 57                mts_input = ts.create_train_inputs(X_values, lag)
 58                dummy_y, scaled_Z = self.cook_training_set(
 59                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 60                )
 61
 62                # Vectorized fit for lag selection
 63                y_means = np.mean(mts_input[0], axis=0)
 64                centered_y = mts_input[0] - y_means[np.newaxis, :]
 65                self.obj.fit(X=scaled_Z, y=centered_y)
 66                residuals = centered_y - self.obj.predict(scaled_Z)
 67                self.residuals_ = residuals  # Keep (n_obs, n_series) shape
 68
 69                ic = self._compute_information_criterion(
 70                    curr_lags=lag, criterion=self.lags
 71                )
 72
 73                if self.verbose:
 74                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 75
 76                if ic < best_ic:
 77                    best_ic = ic
 78                    best_lags = lag
 79
 80            if self.verbose:
 81                print(
 82                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 83                )
 84
 85            self.lags = best_lags
 86
 87        # Data preprocessing (from parent)
 88        self.input_dates = None
 89        self.df_ = None
 90
 91        if isinstance(X, pd.DataFrame) is False:
 92            if xreg is None:
 93                X = pd.DataFrame(X)
 94                self.series_names = [
 95                    "series" + str(i) for i in range(X.shape[1])
 96                ]
 97            else:
 98                X = mo.cbind(X, xreg)
 99                self.xreg_ = xreg
100        else:
101            X_index = None
102            if X.index is not None:
103                X_index = X.index
104            if xreg is None:
105                X = copy.deepcopy(mo.convert_df_to_numeric(X))
106            else:
107                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
108                self.xreg_ = xreg
109            if X_index is not None:
110                X.index = X_index
111            self.series_names = X.columns.tolist()
112
113        if isinstance(X, pd.DataFrame):
114            if self.df_ is None:
115                self.df_ = X
116                X = X.values
117            else:
118                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
119                frequency = pd.infer_freq(input_dates_prev)
120                self.df_ = pd.concat([self.df_, X], axis=0)
121                self.input_dates = pd.date_range(
122                    start=input_dates_prev[0],
123                    periods=len(input_dates_prev) + X.shape[0],
124                    freq=frequency,
125                ).values.tolist()
126                self.df_.index = self.input_dates
127                X = self.df_.values
128            self.df_.columns = self.series_names
129        else:
130            if self.df_ is None:
131                self.df_ = pd.DataFrame(X, columns=self.series_names)
132            else:
133                self.df_ = pd.concat(
134                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
135                    axis=0,
136                )
137
138        self.input_dates = ts.compute_input_dates(self.df_)
139
140        n, p = X.shape
141        self.n_obs_ = n
142        rep_1_n = np.repeat(1, n)
143
144        self.y_ = None
145        self.X_ = None
146        self.n_series = p
147        self.fit_objs_.clear()
148        self.y_means_.clear()
149        self.residuals_ = None
150        self.residuals_sims_ = None
151        self.kde_ = None
152        self.sims_ = None
153        self.scaled_Z_ = None
154        self.centered_y_is_ = []
155
156        # Create training inputs
157        mts_input = ts.create_train_inputs(X[::-1], self.lags)
158        self.y_ = mts_input[0]
159        self.X_ = mts_input[1]
160
161        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
162        self.scaled_Z_ = scaled_Z
163
164        if self.verbose > 0:
165            print(
166                f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n"
167            )
168
169        # VECTORIZED FITTING - NO LOOP
170        y_means_array = np.array(
171            [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)]
172        )
173        for i in range(self.init_n_series_):
174            self.y_means_[i] = y_means_array[i]
175
176        centered_y_all = self.y_ - y_means_array[np.newaxis, :]
177        self.centered_y_is_ = [
178            centered_y_all[:, i] for i in range(self.init_n_series_)
179        ]
180
181        # Single vectorized fit for all series
182        self.obj.fit(scaled_Z, centered_y_all)
183
184        # All series share the same model
185        for i in range(self.init_n_series_):
186            self.fit_objs_[i] = self.obj
187
188        # Vectorized residuals - ONLY target columns (n_obs, n_series)
189        preds_all = self.obj.predict(scaled_Z)
190        residuals_raw = centered_y_all - preds_all
191
192        # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns
193        # In case there's some dimension mismatch, explicitly slice
194        self.residuals_ = residuals_raw[:, : self.init_n_series_]
195
196        # Handle type_pi
197        if self.type_pi == "gaussian":
198            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
199
200        if self.type_pi.startswith("scp2"):
201            data_mean = np.mean(self.residuals_, axis=0)
202            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
203            self.residuals_ = (
204                self.residuals_ - data_mean[np.newaxis, :]
205            ) / self.residuals_std_dev_[np.newaxis, :]
206
207        if self.replications is not None and "kde" in self.type_pi:
208            if self.verbose > 0:
209                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
210            assert self.kernel in (
211                "gaussian",
212                "tophat",
213            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
214            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
215            grid = GridSearchCV(
216                KernelDensity(kernel=self.kernel, **kwargs),
217                param_grid=kernel_bandwidths,
218            )
219            grid.fit(self.residuals_)
220            if self.verbose > 0:
221                print(
222                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
223                )
224            self.kde_ = grid.best_estimator_
225
226        return self
227
228    def predict(self, h=5, level=95, quantiles=None, **kwargs):
229        """Override predict to handle vectorized model predictions"""
230
231        # Delegate to parent for quantiles and multiple levels
232        if quantiles is not None or isinstance(level, (list, np.ndarray)):
233            return super().predict(
234                h=h, level=level, quantiles=quantiles, **kwargs
235            )
236
237        # Store original obj temporarily
238        original_obj = self.obj
239
240        # Create wrapper that extracts the i-th output for each series
241        class VectorizedWrapper:
242            def __init__(self, model, series_idx):
243                self.model = model
244                self.series_idx = series_idx
245
246            def predict(self, X, **kw):
247                """Predict and return only the output for this series index"""
248                preds = self.model.predict(X, **kw)
249                # preds shape: (n_samples, n_series) or (n_series,)
250                if len(preds.shape) == 1:
251                    # Single prediction: (n_series,)
252                    return preds[self.series_idx: self.series_idx + 1]
253                else:
254                    # Multiple predictions: (n_samples, n_series)
255                    return preds[
256                        :, self.series_idx: self.series_idx + 1
257                    ].flatten()
258
259        # Wrap each series with its own index
260        for i in range(self.init_n_series_):
261            self.fit_objs_[i] = VectorizedWrapper(original_obj, i)
262
263        try:
264            result = super().predict(
265                h=h, level=level, quantiles=quantiles, **kwargs
266            )
267        finally:
268            # Restore original
269            for i in range(self.init_n_series_):
270                self.fit_objs_[i] = original_obj
271
272        return result

MTS subclass optimized for multivariate time series with vectorized models

Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)

def fit(self, X, xreg=None, **kwargs):
 21    def fit(self, X, xreg=None, **kwargs):
 22        """Fit with vectorized multi-output model - requires n_series >= 2"""
 23
 24        # Validate multivariate input
 25        try:
 26            self.init_n_series_ = X.shape[1]
 27        except IndexError:
 28            raise ValueError(
 29                "MultiOutputMTS requires multivariate input (n_samples, n_series)"
 30            )
 31
 32        if self.init_n_series_ < 2:
 33            raise ValueError(
 34                f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}"
 35            )
 36
 37        # Automatic lag selection if requested (copied from parent)
 38        if isinstance(self.lags, str):
 39            max_lags = min(25, X.shape[0] // 4)
 40            best_ic = float("inf")
 41            best_lags = 1
 42
 43            if self.verbose:
 44                print(
 45                    f"\nSelecting optimal number of lags using {self.lags}..."
 46                )
 47                iterator = tqdm(range(1, max_lags + 1))
 48            else:
 49                iterator = range(1, max_lags + 1)
 50
 51            for lag in iterator:
 52                if isinstance(X, pd.DataFrame):
 53                    X_values = X.values[::-1]
 54                else:
 55                    X_values = X[::-1]
 56
 57                mts_input = ts.create_train_inputs(X_values, lag)
 58                dummy_y, scaled_Z = self.cook_training_set(
 59                    y=np.ones(mts_input[0].shape[0]), X=mts_input[1]
 60                )
 61
 62                # Vectorized fit for lag selection
 63                y_means = np.mean(mts_input[0], axis=0)
 64                centered_y = mts_input[0] - y_means[np.newaxis, :]
 65                self.obj.fit(X=scaled_Z, y=centered_y)
 66                residuals = centered_y - self.obj.predict(scaled_Z)
 67                self.residuals_ = residuals  # Keep (n_obs, n_series) shape
 68
 69                ic = self._compute_information_criterion(
 70                    curr_lags=lag, criterion=self.lags
 71                )
 72
 73                if self.verbose:
 74                    print(f"Trying lags={lag}, {self.lags}={ic:.2f}")
 75
 76                if ic < best_ic:
 77                    best_ic = ic
 78                    best_lags = lag
 79
 80            if self.verbose:
 81                print(
 82                    f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}"
 83                )
 84
 85            self.lags = best_lags
 86
 87        # Data preprocessing (from parent)
 88        self.input_dates = None
 89        self.df_ = None
 90
 91        if isinstance(X, pd.DataFrame) is False:
 92            if xreg is None:
 93                X = pd.DataFrame(X)
 94                self.series_names = [
 95                    "series" + str(i) for i in range(X.shape[1])
 96                ]
 97            else:
 98                X = mo.cbind(X, xreg)
 99                self.xreg_ = xreg
100        else:
101            X_index = None
102            if X.index is not None:
103                X_index = X.index
104            if xreg is None:
105                X = copy.deepcopy(mo.convert_df_to_numeric(X))
106            else:
107                X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg))
108                self.xreg_ = xreg
109            if X_index is not None:
110                X.index = X_index
111            self.series_names = X.columns.tolist()
112
113        if isinstance(X, pd.DataFrame):
114            if self.df_ is None:
115                self.df_ = X
116                X = X.values
117            else:
118                input_dates_prev = pd.DatetimeIndex(self.df_.index.values)
119                frequency = pd.infer_freq(input_dates_prev)
120                self.df_ = pd.concat([self.df_, X], axis=0)
121                self.input_dates = pd.date_range(
122                    start=input_dates_prev[0],
123                    periods=len(input_dates_prev) + X.shape[0],
124                    freq=frequency,
125                ).values.tolist()
126                self.df_.index = self.input_dates
127                X = self.df_.values
128            self.df_.columns = self.series_names
129        else:
130            if self.df_ is None:
131                self.df_ = pd.DataFrame(X, columns=self.series_names)
132            else:
133                self.df_ = pd.concat(
134                    [self.df_, pd.DataFrame(X, columns=self.series_names)],
135                    axis=0,
136                )
137
138        self.input_dates = ts.compute_input_dates(self.df_)
139
140        n, p = X.shape
141        self.n_obs_ = n
142        rep_1_n = np.repeat(1, n)
143
144        self.y_ = None
145        self.X_ = None
146        self.n_series = p
147        self.fit_objs_.clear()
148        self.y_means_.clear()
149        self.residuals_ = None
150        self.residuals_sims_ = None
151        self.kde_ = None
152        self.sims_ = None
153        self.scaled_Z_ = None
154        self.centered_y_is_ = []
155
156        # Create training inputs
157        mts_input = ts.create_train_inputs(X[::-1], self.lags)
158        self.y_ = mts_input[0]
159        self.X_ = mts_input[1]
160
161        dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_)
162        self.scaled_Z_ = scaled_Z
163
164        if self.verbose > 0:
165            print(
166                f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n"
167            )
168
169        # VECTORIZED FITTING - NO LOOP
170        y_means_array = np.array(
171            [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)]
172        )
173        for i in range(self.init_n_series_):
174            self.y_means_[i] = y_means_array[i]
175
176        centered_y_all = self.y_ - y_means_array[np.newaxis, :]
177        self.centered_y_is_ = [
178            centered_y_all[:, i] for i in range(self.init_n_series_)
179        ]
180
181        # Single vectorized fit for all series
182        self.obj.fit(scaled_Z, centered_y_all)
183
184        # All series share the same model
185        for i in range(self.init_n_series_):
186            self.fit_objs_[i] = self.obj
187
188        # Vectorized residuals - ONLY target columns (n_obs, n_series)
189        preds_all = self.obj.predict(scaled_Z)
190        residuals_raw = centered_y_all - preds_all
191
192        # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns
193        # In case there's some dimension mismatch, explicitly slice
194        self.residuals_ = residuals_raw[:, : self.init_n_series_]
195
196        # Handle type_pi
197        if self.type_pi == "gaussian":
198            self.gaussian_preds_std_ = np.std(self.residuals_, axis=0)
199
200        if self.type_pi.startswith("scp2"):
201            data_mean = np.mean(self.residuals_, axis=0)
202            self.residuals_std_dev_ = np.std(self.residuals_, axis=0)
203            self.residuals_ = (
204                self.residuals_ - data_mean[np.newaxis, :]
205            ) / self.residuals_std_dev_[np.newaxis, :]
206
207        if self.replications is not None and "kde" in self.type_pi:
208            if self.verbose > 0:
209                print(f"\n Simulate residuals using {self.kernel} kernel... \n")
210            assert self.kernel in (
211                "gaussian",
212                "tophat",
213            ), "currently, 'kernel' must be either 'gaussian' or 'tophat'"
214            kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)}
215            grid = GridSearchCV(
216                KernelDensity(kernel=self.kernel, **kwargs),
217                param_grid=kernel_bandwidths,
218            )
219            grid.fit(self.residuals_)
220            if self.verbose > 0:
221                print(
222                    f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n"
223                )
224            self.kde_ = grid.best_estimator_
225
226        return self

Fit with vectorized multi-output model - requires n_series >= 2

def predict(self, h=5, level=95, quantiles=None, **kwargs):
228    def predict(self, h=5, level=95, quantiles=None, **kwargs):
229        """Override predict to handle vectorized model predictions"""
230
231        # Delegate to parent for quantiles and multiple levels
232        if quantiles is not None or isinstance(level, (list, np.ndarray)):
233            return super().predict(
234                h=h, level=level, quantiles=quantiles, **kwargs
235            )
236
237        # Store original obj temporarily
238        original_obj = self.obj
239
240        # Create wrapper that extracts the i-th output for each series
241        class VectorizedWrapper:
242            def __init__(self, model, series_idx):
243                self.model = model
244                self.series_idx = series_idx
245
246            def predict(self, X, **kw):
247                """Predict and return only the output for this series index"""
248                preds = self.model.predict(X, **kw)
249                # preds shape: (n_samples, n_series) or (n_series,)
250                if len(preds.shape) == 1:
251                    # Single prediction: (n_series,)
252                    return preds[self.series_idx: self.series_idx + 1]
253                else:
254                    # Multiple predictions: (n_samples, n_series)
255                    return preds[
256                        :, self.series_idx: self.series_idx + 1
257                    ].flatten()
258
259        # Wrap each series with its own index
260        for i in range(self.init_n_series_):
261            self.fit_objs_[i] = VectorizedWrapper(original_obj, i)
262
263        try:
264            result = super().predict(
265                h=h, level=level, quantiles=quantiles, **kwargs
266            )
267        finally:
268            # Restore original
269            for i in range(self.init_n_series_):
270                self.fit_objs_[i] = original_obj
271
272        return result

Override predict to handle vectorized model predictions

class MultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 16class MultitaskClassifier(Base, ClassifierMixin):
 17    """Multitask Classification model based on regression models, with shared covariates
 18
 19    Parameters:
 20
 21        obj: object
 22            any object (must be a regression model) containing a method fit (obj.fit())
 23            and a method predict (obj.predict())
 24
 25        n_hidden_features: int
 26            number of nodes in the hidden layer
 27
 28        activation_name: str
 29            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 30
 31        a: float
 32            hyperparameter for 'prelu' or 'elu' activation function
 33
 34        nodes_sim: str
 35            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 36            'uniform'
 37
 38        bias: boolean
 39            indicates if the hidden layer contains a bias term (True) or not
 40            (False)
 41
 42        dropout: float
 43            regularization parameter; (random) percentage of nodes dropped out
 44            of the training
 45
 46        direct_link: boolean
 47            indicates if the original predictors are included (True) in model's
 48            fitting or not (False)
 49
 50        n_clusters: int
 51            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 52                no clustering)
 53
 54        cluster_encode: bool
 55            defines how the variable containing clusters is treated (default is one-hot)
 56            if `False`, then labels are used, without one-hot encoding
 57
 58        type_clust: str
 59            type of clustering method: currently k-means ('kmeans') or Gaussian
 60            Mixture Model ('gmm')
 61
 62        type_scaling: a tuple of 3 strings
 63            scaling methods for inputs, hidden layer, and clustering respectively
 64            (and when relevant).
 65            Currently available: standardization ('std') or MinMax scaling ('minmax')
 66
 67        col_sample: float
 68            percentage of covariates randomly chosen for training
 69
 70        row_sample: float
 71            percentage of rows chosen for training, by stratified bootstrapping
 72
 73        seed: int
 74            reproducibility seed for nodes_sim=='uniform'
 75
 76        backend: str
 77            "cpu" or "gpu" or "tpu"
 78
 79    Attributes:
 80
 81        fit_objs_: dict
 82            objects adjusted to each individual time series
 83
 84        n_classes_: int
 85            number of classes for the classifier
 86
 87    Examples:
 88
 89    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py)
 90
 91    ```python
 92    import nnetsauce as ns
 93    import numpy as np
 94    from sklearn.datasets import load_breast_cancer
 95    from sklearn.linear_model import LinearRegression
 96    from sklearn.model_selection import train_test_split
 97    from sklearn import metrics
 98    from time import time
 99
100    breast_cancer = load_breast_cancer()
101    Z = breast_cancer.data
102    t = breast_cancer.target
103
104    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
105                                                        random_state=123+2*10)
106
107    # Linear Regression is used
108    regr = LinearRegression()
109    fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
110                                n_clusters=2, type_clust="gmm")
111
112    start = time()
113    fit_obj.fit(X_train, y_train)
114    print(f"Elapsed {time() - start}")
115
116    print(fit_obj.score(X_test, y_test))
117    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
118
119    start = time()
120    preds = fit_obj.predict(X_test)
121    print(f"Elapsed {time() - start}")
122    print(metrics.classification_report(preds, y_test))
123    ```
124
125    """
126
127    # construct the object -----
128    _estimator_type = "classifier"
129
130    def __init__(
131        self,
132        obj,
133        n_hidden_features=5,
134        activation_name="relu",
135        a=0.01,
136        nodes_sim="sobol",
137        bias=True,
138        dropout=0,
139        direct_link=True,
140        n_clusters=2,
141        cluster_encode=True,
142        type_clust="kmeans",
143        type_scaling=("std", "std", "std"),
144        col_sample=1,
145        row_sample=1,
146        seed=123,
147        backend="cpu",
148    ):
149        super().__init__(
150            n_hidden_features=n_hidden_features,
151            activation_name=activation_name,
152            a=a,
153            nodes_sim=nodes_sim,
154            bias=bias,
155            dropout=dropout,
156            direct_link=direct_link,
157            n_clusters=n_clusters,
158            cluster_encode=cluster_encode,
159            type_clust=type_clust,
160            type_scaling=type_scaling,
161            col_sample=col_sample,
162            row_sample=row_sample,
163            seed=seed,
164            backend=backend,
165        )
166
167        self.type_fit = "classification"
168        self.obj = obj
169        self.fit_objs_ = {}
170
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
229
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
276
277    def decision_function(self, X, **kwargs):
278        """Compute the decision function of X.
279
280        Parameters:
281            X: {array-like}, shape = [n_samples, n_features]
282                Samples to compute decision function for.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288            array-like of shape (n_samples,) or (n_samples, n_classes)
289            Decision function of the input samples. The order of outputs is the same
290            as that of the classes passed to fit.
291        """
292        if not hasattr(self.obj, "decision_function"):
293            # If base classifier doesn't have decision_function, use predict_proba
294            proba = self.predict_proba(X, **kwargs)
295            if proba.shape[1] == 2:
296                return proba[:, 1]  # For binary classification
297            return proba  # For multiclass
298
299        if len(X.shape) == 1:
300            n_features = X.shape[0]
301            new_X = mo.rbind(
302                X.reshape(1, n_features),
303                np.ones(n_features).reshape(1, n_features),
304            )
305
306            return (
307                self.obj.decision_function(
308                    self.cook_test_set(new_X, **kwargs), **kwargs
309                )
310            )[0]
311
312        return self.obj.decision_function(
313            self.cook_test_set(X, **kwargs), **kwargs
314        )
315
316    @property
317    def _estimator_type(self):
318        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
                            n_clusters=2, type_clust="gmm")

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
171    def fit(self, X, y, sample_weight=None, **kwargs):
172        """Fit MultitaskClassifier to training data (X, y).
173
174        Args:
175
176            X: {array-like}, shape = [n_samples, n_features]
177                Training vectors, where n_samples is the number
178                of samples and n_features is the number of features.
179
180            y: array-like, shape = [n_samples]
181                Target values.
182
183            **kwargs: additional parameters to be passed to
184                    self.cook_training_set or self.obj.fit
185
186        Returns:
187
188            self: object
189
190        """
191
192        assert mx.is_factor(y), "y must contain only integers"
193
194        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
195
196        self.classes_ = np.unique(y)  # for compatibility with sklearn
197        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes_)
201
202        # if sample_weight is None:
203        for i in range(self.n_classes_):
204            self.fit_objs_[i] = deepcopy(
205                self.obj.fit(scaled_Z, Y[:, i], **kwargs)
206            )
207
208        self.classes_ = np.unique(y)
209        return self

Fit MultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
230    def predict_proba(self, X, **kwargs):
231        """Predict probabilities for test data X.
232
233        Args:
234
235            X: {array-like}, shape = [n_samples, n_features]
236                Training vectors, where n_samples is the number
237                of samples and n_features is the number of features.
238
239            **kwargs: additional parameters to be passed to
240                    self.cook_test_set
241
242        Returns:
243
244            probability estimates for test data: {array-like}
245
246        """
247
248        shape_X = X.shape
249
250        probs = np.zeros((shape_X[0], self.n_classes_))
251
252        if len(shape_X) == 1:
253            n_features = shape_X[0]
254
255            new_X = mo.rbind(
256                X.reshape(1, n_features),
257                np.ones(n_features).reshape(1, n_features),
258            )
259
260            Z = self.cook_test_set(new_X, **kwargs)
261
262            # loop on all the classes
263            for i in range(self.n_classes_):
264                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
265
266        else:
267            Z = self.cook_test_set(X, **kwargs)
268
269            # loop on all the classes
270            for i in range(self.n_classes_):
271                probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
272
273        expit_raw_probs = expit(probs)
274
275        return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class NeuralNetRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
112class NeuralNetRegressor(BaseEstimator, RegressorMixin):
113    """
114    (Pretrained) Neural Network Regressor.
115
116    Parameters:
117
118        hidden_layer_sizes : tuple, default=(100,)
119            The number of neurons in each hidden layer.
120        max_iter : int, default=100
121            The maximum number of iterations to train the model.
122        learning_rate : float, default=0.01
123            The learning rate for the optimizer.
124        l1_ratio : float, default=0.5
125            The ratio of L1 regularization.
126        alpha : float, default=1e-6
127            The regularization parameter.
128        activation_name : str, default="relu"
129            The activation function to use.
130        dropout : float, default=0.0
131            The dropout rate.
132        random_state : int, default=None
133            The random state for the random number generator.
134        weights : list, default=None
135            The weights to initialize the model with.
136
137    Attributes:
138
139        weights : list
140            The weights of the model.
141        params : list
142            The parameters of the model.
143        scaler_ : sklearn.preprocessing.StandardScaler
144            The scaler used to standardize the input features.
145        y_mean_ : float
146            The mean of the target variable.
147
148    Methods:
149
150        fit(X, y)
151            Fit the model to the data.
152        predict(X)
153            Predict the target variable.
154        get_weights()
155            Get the weights of the model.
156        set_weights(weights)
157            Set the weights of the model.
158    """
159
160    def __init__(
161        self,
162        hidden_layer_sizes=None,
163        max_iter=100,
164        learning_rate=0.01,
165        l1_ratio=0.5,
166        alpha=1e-6,
167        activation_name="relu",
168        dropout=0,
169        weights=None,
170        random_state=None,
171    ):
172        if not JAX_AVAILABLE:
173            raise RuntimeError(
174                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
175            )
176
177        if weights is None and hidden_layer_sizes is None:
178            hidden_layer_sizes = (100,)  # default value if neither is provided
179        self.hidden_layer_sizes = hidden_layer_sizes
180        self.max_iter = max_iter
181        self.learning_rate = learning_rate
182        self.l1_ratio = l1_ratio
183        self.alpha = alpha
184        self.activation_name = activation_name
185        self.dropout = dropout
186        self.weights = weights
187        self.random_state = random_state
188        self.params = None
189        self.scaler_ = StandardScaler()
190        self.y_mean_ = None
191
192    def _validate_weights(self, input_dim):
193        """Validate that weights dimensions are coherent."""
194        if not self.weights:
195            return False
196
197        try:
198            # Check each layer's weights and biases
199            prev_dim = input_dim
200            for W, b in self.weights:
201                # Check weight matrix dimensions
202                if W.shape[0] != prev_dim:
203                    raise ValueError(
204                        f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}"
205                    )
206                # Check bias dimension matches weight matrix output
207                if W.shape[1] != b.shape[0]:
208                    raise ValueError(
209                        f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}"
210                    )
211                prev_dim = W.shape[1]
212
213            # Check final output dimension is 1 for regression
214            if prev_dim != 1:
215                raise ValueError(
216                    f"Final layer output dimension {prev_dim} must be 1 for regression"
217                )
218
219            return True
220        except (AttributeError, IndexError):
221            raise ValueError(
222                "Weights format is invalid. Expected list of (weight, bias) tuples"
223            )
224
225    def fit(self, X, y):
226        # Standardize the input features
227        X = self.scaler_.fit_transform(X)
228        # Ensure y is 2D for consistency
229        y = y.reshape(-1, 1)
230        self.y_mean_ = jnp.mean(y)
231        y = y - self.y_mean_
232        # Validate or initialize weights
233        if self.weights is not None:
234            if self._validate_weights(X.shape[1]):
235                self.params = self.weights
236        else:
237            if self.hidden_layer_sizes is None:
238                raise ValueError(
239                    "Either weights or hidden_layer_sizes must be provided"
240                )
241            self.params = initialize_params(
242                X.shape[1], self.hidden_layer_sizes, self.random_state
243            )
244        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
245        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
246        perex_grads = jit(
247            vmap(grad_loss, in_axes=(None, 0, 0))
248        )  # fast per-example grads
249        # Training loop
250        for _ in range(self.max_iter):
251            grads = perex_grads(self.params, X, y)
252            # Average gradients across examples
253            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
254            # Update parameters
255            self.params = [
256                (W - self.learning_rate * dW, b - self.learning_rate * db)
257                for (W, b), (dW, db) in zip(self.params, grads)
258            ]
259        # Store final weights
260        self.weights = self.params
261        return self
262
263    def get_weights(self):
264        """Return the current weights of the model."""
265        if self.weights is None:
266            raise ValueError(
267                "No weights available. Model has not been fitted yet."
268            )
269        return self.weights
270
271    def set_weights(self, weights):
272        """Set the weights of the model manually."""
273        self.weights = weights
274        self.params = weights
275
276    def predict(self, X):
277        X = self.scaler_.transform(X)
278        if self.params is None:
279            raise ValueError("Model has not been fitted yet.")
280        predictions = predict_internal(
281            self.params,
282            X,
283            activation_func=self.activation_name,
284            dropout=self.dropout,
285            seed=self.random_state,
286        )
287        return predictions.reshape(-1) + self.y_mean_

(Pretrained) Neural Network Regressor.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
225    def fit(self, X, y):
226        # Standardize the input features
227        X = self.scaler_.fit_transform(X)
228        # Ensure y is 2D for consistency
229        y = y.reshape(-1, 1)
230        self.y_mean_ = jnp.mean(y)
231        y = y - self.y_mean_
232        # Validate or initialize weights
233        if self.weights is not None:
234            if self._validate_weights(X.shape[1]):
235                self.params = self.weights
236        else:
237            if self.hidden_layer_sizes is None:
238                raise ValueError(
239                    "Either weights or hidden_layer_sizes must be provided"
240                )
241            self.params = initialize_params(
242                X.shape[1], self.hidden_layer_sizes, self.random_state
243            )
244        loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha)
245        grad_loss = jit(grad(loss_fn))  # compiled gradient evaluation function
246        perex_grads = jit(
247            vmap(grad_loss, in_axes=(None, 0, 0))
248        )  # fast per-example grads
249        # Training loop
250        for _ in range(self.max_iter):
251            grads = perex_grads(self.params, X, y)
252            # Average gradients across examples
253            grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads)
254            # Update parameters
255            self.params = [
256                (W - self.learning_rate * dW, b - self.learning_rate * db)
257                for (W, b), (dW, db) in zip(self.params, grads)
258            ]
259        # Store final weights
260        self.weights = self.params
261        return self
def predict(self, X):
276    def predict(self, X):
277        X = self.scaler_.transform(X)
278        if self.params is None:
279            raise ValueError("Model has not been fitted yet.")
280        predictions = predict_internal(
281            self.params,
282            X,
283            activation_func=self.activation_name,
284            dropout=self.dropout,
285            seed=self.random_state,
286        )
287        return predictions.reshape(-1) + self.y_mean_
class NeuralNetClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 10class NeuralNetClassifier(BaseEstimator, ClassifierMixin):
 11    """
 12    (Pretrained) Neural Network Classifier.
 13
 14    Parameters:
 15
 16        hidden_layer_sizes : tuple, default=(100,)
 17            The number of neurons in each hidden layer.
 18        max_iter : int, default=100
 19            The maximum number of iterations to train the model.
 20        learning_rate : float, default=0.01
 21            The learning rate for the optimizer.
 22        l1_ratio : float, default=0.5
 23            The ratio of L1 regularization.
 24        alpha : float, default=1e-6
 25            The regularization parameter.
 26        activation_name : str, default="relu"
 27            The activation function to use.
 28        dropout : float, default=0.0
 29            The dropout rate.
 30        random_state : int, default=None
 31            The random state for the random number generator.
 32        weights : list, default=None
 33            The weights to initialize the model with.
 34
 35    Attributes:
 36
 37        weights : list
 38            The weights of the model.
 39        params : list
 40            The parameters of the model.
 41        scaler_ : sklearn.preprocessing.StandardScaler
 42            The scaler used to standardize the input features.
 43        y_mean_ : float
 44            The mean of the target variable.
 45
 46    Methods:
 47
 48        fit(X, y)
 49            Fit the model to the data.
 50        predict(X)
 51            Predict the target variable.
 52        predict_proba(X)
 53            Predict the probability of the target variable.
 54        get_weights()
 55            Get the weights of the model.
 56        set_weights(weights)
 57            Set the weights of the model.
 58    """
 59
 60    _estimator_type = "classifier"
 61
 62    def __init__(
 63        self,
 64        hidden_layer_sizes=(100,),
 65        max_iter=100,
 66        learning_rate=0.01,
 67        weights=None,
 68        l1_ratio=0.5,
 69        alpha=1e-6,
 70        activation_name="relu",
 71        dropout=0.0,
 72        random_state=None,
 73    ):
 74        self.hidden_layer_sizes = hidden_layer_sizes
 75        self.max_iter = max_iter
 76        self.learning_rate = learning_rate
 77        self.weights = weights
 78        self.l1_ratio = l1_ratio
 79        self.alpha = alpha
 80        self.activation_name = activation_name
 81        self.dropout = dropout
 82        self.random_state = random_state
 83        self.regr = None
 84
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self
122
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)
133
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)
144
145    @property
146    def _estimator_type(self):
147        return "classifier"

(Pretrained) Neural Network Classifier.

Parameters:

hidden_layer_sizes : tuple, default=(100,)
    The number of neurons in each hidden layer.
max_iter : int, default=100
    The maximum number of iterations to train the model.
learning_rate : float, default=0.01
    The learning rate for the optimizer.
l1_ratio : float, default=0.5
    The ratio of L1 regularization.
alpha : float, default=1e-6
    The regularization parameter.
activation_name : str, default="relu"
    The activation function to use.
dropout : float, default=0.0
    The dropout rate.
random_state : int, default=None
    The random state for the random number generator.
weights : list, default=None
    The weights to initialize the model with.

Attributes:

weights : list
    The weights of the model.
params : list
    The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
    The scaler used to standardize the input features.
y_mean_ : float
    The mean of the target variable.

Methods:

fit(X, y)
    Fit the model to the data.
predict(X)
    Predict the target variable.
predict_proba(X)
    Predict the probability of the target variable.
get_weights()
    Get the weights of the model.
set_weights(weights)
    Set the weights of the model.
def fit(self, X, y):
 85    def fit(self, X, y):
 86        """Fit the model to the data.
 87
 88        Parameters:
 89
 90            X: {array-like}, shape = [n_samples, n_features]
 91                Training vectors, where n_samples is the number of samples and
 92                n_features is the number of features.
 93            y: array-like, shape = [n_samples]
 94                Target values.
 95        """
 96        regressor = NeuralNetRegressor(
 97            hidden_layer_sizes=self.hidden_layer_sizes,
 98            max_iter=self.max_iter,
 99            learning_rate=self.learning_rate,
100            weights=self.weights,
101            l1_ratio=self.l1_ratio,
102            alpha=self.alpha,
103            activation_name=self.activation_name,
104            dropout=self.dropout,
105            random_state=self.random_state,
106        )
107        self.regr = SimpleMultitaskClassifier(regressor)
108        self.regr.fit(X, y)
109        self.classes_ = np.unique(y)
110        self.n_classes_ = len(self.classes_)
111        self.n_tasks_ = 1
112        self.n_features_in_ = X.shape[1]
113        self.n_outputs_ = 1
114        self.n_samples_fit_ = X.shape[0]
115        self.n_samples_test_ = X.shape[0]
116        self.n_features_out_ = 1
117        self.n_outputs_ = 1
118        self.n_features_in_ = X.shape[1]
119        self.n_features_out_ = 1
120        self.n_outputs_ = 1
121        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict_proba(self, X):
123    def predict_proba(self, X):
124        """Predict the probability of the target variable.
125
126        Parameters:
127
128            X: {array-like}, shape = [n_samples, n_features]
129                Training vectors, where n_samples is the number of samples and
130                n_features is the number of features.
131        """
132        return self.regr.predict_proba(X)

Predict the probability of the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
def predict(self, X):
134    def predict(self, X):
135        """Predict the target variable.
136
137        Parameters:
138
139            X: {array-like}, shape = [n_samples, n_features]
140                Training vectors, where n_samples is the number of samples and
141                n_features is the number of features.
142        """
143        return self.regr.predict(X)

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
class PredictionInterval(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 21class PredictionInterval(BaseEstimator, RegressorMixin):
 22    """Class PredictionInterval: Obtain prediction intervals.
 23
 24    Attributes:
 25
 26        obj: an object;
 27            fitted object containing methods `fit` and `predict`
 28
 29        method: a string;
 30            method for constructing the prediction intervals.
 31            Currently "splitconformal" (default) and "localconformal"
 32
 33        level: a float;
 34            Confidence level for prediction intervals. Default is 95,
 35            equivalent to a miscoverage error of 5 (%)
 36
 37        replications: an integer;
 38            Number of replications for simulated conformal (default is `None`)
 39
 40        type_pi: a string;
 41            type of prediction interval: currently `None`
 42            (split conformal without simulation)
 43            for type_pi in:
 44                - 'bootstrap': Bootstrap resampling.
 45                - 'kde': Kernel Density Estimation.
 46
 47        type_split: a string;
 48            "random" (random split of data) or "sequential" (sequential split of data)
 49
 50        seed: an integer;
 51            Reproducibility of fit (there's a random split between fitting and calibration data)
 52    """
 53
 54    def __init__(
 55        self,
 56        obj,
 57        method="splitconformal",
 58        level=95,
 59        type_pi=None,
 60        type_split="random",
 61        replications=None,
 62        kernel=None,
 63        agg="mean",
 64        seed=123,
 65    ):
 66        self.obj = obj
 67        self.method = method
 68        self.level = level
 69        self.type_pi = type_pi
 70        self.type_split = type_split
 71        self.replications = replications
 72        self.kernel = kernel
 73        self.agg = agg
 74        self.seed = seed
 75        self.alpha_ = 1 - self.level / 100
 76        self.quantile_ = None
 77        self.icp_ = None
 78        self.calibrated_residuals_ = None
 79        self.scaled_calibrated_residuals_ = None
 80        self.calibrated_residuals_scaler_ = None
 81        self.kde_ = None
 82        self.aic_ = None
 83        self.aicc_ = None
 84        self.bic_ = None
 85        self.sse_ = None
 86
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit the `method` to training data (X, y).
 89
 90        Args:
 91
 92            X: array-like, shape = [n_samples, n_features];
 93                Training set vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples, ]; Target values.
 97
 98            sample_weight: array-like, shape = [n_samples]
 99                Sample weights.
100
101        """
102
103        if self.type_split == "random":
104            X_train, X_calibration, y_train, y_calibration = train_test_split(
105                X, y, test_size=0.5, random_state=self.seed
106            )
107
108        elif self.type_split == "sequential":
109            n_x = X.shape[0]
110            n_x_half = n_x // 2
111            first_half_idx = range(0, n_x_half)
112            second_half_idx = range(n_x_half, n_x)
113            X_train = X[first_half_idx, :]
114            X_calibration = X[second_half_idx, :]
115            y_train = y[first_half_idx]
116            y_calibration = y[second_half_idx]
117
118        if self.method == "splitconformal":
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(
147                self.obj, mad_estimator, AbsErrorErrFunc()
148            )
149            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
150            self.icp_ = IcpRegressor(nc)
151            self.icp_.fit(X_train, y_train)
152            self.icp_.calibrate(X_calibration, y_calibration)
153
154            # FIX: Store calibration residuals from the ICP scorer so that
155            # simulation-based prediction intervals are available in predict().
156            raw_residuals = self.icp_.nc_function.err_func.apply(
157                self.icp_.nc_function.predict(X_calibration), y_calibration
158            )
159            self.calibrated_residuals_ = raw_residuals
160            self.calibrated_residuals_scaler_ = StandardScaler(
161                with_mean=True, with_std=True
162            )
163            self.scaled_calibrated_residuals_ = (
164                self.calibrated_residuals_scaler_.fit_transform(
165                    self.calibrated_residuals_.reshape(-1, 1)
166                ).ravel()
167            )
168
169        # Calculate AIC
170        # Get predictions
171        preds = self.obj.predict(X_calibration)
172
173        # Calculate SSE
174        self.sse_ = np.sum((y_calibration - preds) ** 2)
175
176        # Get number of parameters from the base model
177        n_params = (
178            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
179        )
180
181        # Calculate AIC
182        n_samples = len(y_calibration)
183        temp = n_samples * np.log(self.sse_ / n_samples)
184        self.aic_ = temp + 2 * n_params
185        self.bic_ = temp + np.log(n_samples) * n_params
186
187        return self
188
189    def _simulate_from_residuals(self, pred, n_obs):
190        """Shared helper: draw `self.replications` simulations from calibrated
191        residuals and return (sims, mean, lower, upper).
192
193        Args:
194            pred: 1-D array of point predictions, shape [n_obs].
195            n_obs: int, number of test observations.
196
197        Returns:
198            sims_   : 2-D array, shape [n_obs, replications]
199            mean_   : 1-D array, shape [n_obs]
200            lower_  : 1-D array, shape [n_obs]
201            upper_  : 1-D array, shape [n_obs]
202        """
203        type_pi = self.type_pi if self.type_pi is not None else "kde"
204        replications = (
205            self.replications if self.replications is not None else 100
206        )
207
208        assert type_pi in (
209            "bootstrap",
210            "kde",
211            "normal",
212            "ecdf",
213            "permutation",
214            "smooth-bootstrap",
215        ), (
216            "`type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', "
217            "'permutation', 'smooth-bootstrap')"
218        )
219
220        scale = self.calibrated_residuals_scaler_.scale_[0]
221
222        if type_pi == "bootstrap":
223            np.random.seed(self.seed)
224            residuals_sims = np.asarray(
225                [
226                    np.random.choice(
227                        a=self.scaled_calibrated_residuals_,
228                        size=n_obs,
229                    )
230                    for _ in range(replications)
231                ]
232            ).T  # shape [n_obs, replications]
233
234        elif type_pi == "kde":
235            kde = gaussian_kde(dataset=self.scaled_calibrated_residuals_)
236            residuals_sims = np.asarray(
237                [
238                    kde.resample(size=n_obs, seed=self.seed + i).ravel()
239                    for i in range(replications)
240                ]
241            ).T  # shape [n_obs, replications]
242
243        else:  # normal / ecdf / permutation / smooth-bootstrap
244            residuals_sims = np.asarray(
245                simulate_replications(
246                    data=self.scaled_calibrated_residuals_,
247                    method=type_pi,
248                    num_replications=replications,
249                    n_obs=n_obs,
250                    seed=self.seed,
251                )
252            ).T  # shape [n_obs, replications]
253
254        sims = np.asarray(
255            [
256                pred + scale * residuals_sims[:, i].ravel()
257                for i in range(replications)
258            ]
259        ).T  # shape [n_obs, replications]
260
261        mean_ = np.mean(sims, axis=1)
262        lower_ = np.quantile(sims, q=self.alpha_ / 200, axis=1)
263        upper_ = np.quantile(sims, q=1 - self.alpha_ / 200, axis=1)
264
265        return sims, mean_, lower_, upper_
266
267    def predict(self, X, return_pi=False):
268        """Obtain predictions and prediction intervals
269
270        Args:
271
272            X: array-like, shape = [n_samples, n_features];
273                Testing set vectors, where n_samples is the number
274                of samples and n_features is the number of features.
275
276            return_pi: boolean
277                Whether the prediction interval is returned or not.
278                Default is False, for compatibility with other _estimators_.
279                If True, a tuple containing the predictions + lower and upper
280                bounds is returned.
281
282        """
283
284        if self.method == "splitconformal":
285            pred = self.obj.predict(X)
286
287        if self.method == "localconformal":
288            pred = self.icp_.predict(X)
289
290        # ------------------------------------------------------------------ #
291        # splitconformal
292        # ------------------------------------------------------------------ #
293        if self.method == "splitconformal":
294            if self.replications is None and self.type_pi is None:
295                # Plain split-conformal: symmetric quantile band
296                if return_pi:
297                    DescribeResult = namedtuple(
298                        "DescribeResult", ("mean", "lower", "upper")
299                    )
300                    return DescribeResult(
301                        pred, pred - self.quantile_, pred + self.quantile_
302                    )
303                else:
304                    return pred
305
306            else:
307                # FIX: simulation-based prediction intervals for splitconformal.
308                # Previously this branch raised NotImplementedError even though
309                # all the necessary logic was present — it was simply unreachable
310                # because the raise fired unconditionally.  The code has been
311                # moved into _simulate_from_residuals() and called here.
312
313                if self.type_pi is None:
314                    warnings.warn(
315                        "type_pi must be set when replications is not None; "
316                        "defaulting to 'kde'."
317                    )
318                if self.replications is None:
319                    warnings.warn(
320                        "replications must be set when type_pi is not None; "
321                        "defaulting to 100."
322                    )
323
324                (
325                    self.sims_,
326                    self.mean_,
327                    self.lower_,
328                    self.upper_,
329                ) = self._simulate_from_residuals(pred, X.shape[0])
330
331                DescribeResult = namedtuple(
332                    "DescribeResult", ("mean", "sims", "lower", "upper")
333                )
334                return DescribeResult(
335                    self.mean_, self.sims_, self.lower_, self.upper_
336                )
337
338        # ------------------------------------------------------------------ #
339        # localconformal
340        # ------------------------------------------------------------------ #
341        if self.method == "localconformal":
342            if self.replications is None:
343                if return_pi:
344                    predictions_bounds = self.icp_.predict(
345                        X, significance=1 - self.level
346                    )
347                    DescribeResult = namedtuple(
348                        "DescribeResult", ("mean", "lower", "upper")
349                    )
350                    return DescribeResult(
351                        pred,
352                        predictions_bounds[:, 0],
353                        predictions_bounds[:, 1],
354                    )
355                else:
356                    return pred
357
358            else:
359                # FIX: simulation-based prediction intervals for localconformal.
360                # Previously this always raised NotImplementedError.  Now we
361                # reuse the calibration residuals stored during fit() and apply
362                # the same simulation logic used by splitconformal via the
363                # shared helper _simulate_from_residuals().
364
365                if self.type_pi is None:
366                    warnings.warn(
367                        "type_pi must be set when replications is not None; "
368                        "defaulting to 'kde'."
369                    )
370
371                (
372                    self.sims_,
373                    self.mean_,
374                    self.lower_,
375                    self.upper_,
376                ) = self._simulate_from_residuals(pred, X.shape[0])
377
378                DescribeResult = namedtuple(
379                    "DescribeResult", ("mean", "sims", "lower", "upper")
380                )
381                return DescribeResult(
382                    self.mean_, self.sims_, self.lower_, self.upper_
383                )

Class PredictionInterval: Obtain prediction intervals.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction intervals.
    Currently "splitconformal" (default) and "localconformal"

level: a float;
    Confidence level for prediction intervals. Default is 95,
    equivalent to a miscoverage error of 5 (%)

replications: an integer;
    Number of replications for simulated conformal (default is `None`)

type_pi: a string;
    type of prediction interval: currently `None`
    (split conformal without simulation)
    for type_pi in:
        - 'bootstrap': Bootstrap resampling.
        - 'kde': Kernel Density Estimation.

type_split: a string;
    "random" (random split of data) or "sequential" (sequential split of data)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
 87    def fit(self, X, y, sample_weight=None, **kwargs):
 88        """Fit the `method` to training data (X, y).
 89
 90        Args:
 91
 92            X: array-like, shape = [n_samples, n_features];
 93                Training set vectors, where n_samples is the number
 94                of samples and n_features is the number of features.
 95
 96            y: array-like, shape = [n_samples, ]; Target values.
 97
 98            sample_weight: array-like, shape = [n_samples]
 99                Sample weights.
100
101        """
102
103        if self.type_split == "random":
104            X_train, X_calibration, y_train, y_calibration = train_test_split(
105                X, y, test_size=0.5, random_state=self.seed
106            )
107
108        elif self.type_split == "sequential":
109            n_x = X.shape[0]
110            n_x_half = n_x // 2
111            first_half_idx = range(0, n_x_half)
112            second_half_idx = range(n_x_half, n_x)
113            X_train = X[first_half_idx, :]
114            X_calibration = X[second_half_idx, :]
115            y_train = y[first_half_idx]
116            y_calibration = y[second_half_idx]
117
118        if self.method == "splitconformal":
119            self.obj.fit(X_train, y_train)
120            preds_calibration = self.obj.predict(X_calibration)
121            self.calibrated_residuals_ = y_calibration - preds_calibration
122            absolute_residuals = np.abs(self.calibrated_residuals_)
123            self.calibrated_residuals_scaler_ = StandardScaler(
124                with_mean=True, with_std=True
125            )
126            self.scaled_calibrated_residuals_ = (
127                self.calibrated_residuals_scaler_.fit_transform(
128                    self.calibrated_residuals_.reshape(-1, 1)
129                ).ravel()
130            )
131            try:
132                # numpy version >= 1.22
133                self.quantile_ = np.quantile(
134                    a=absolute_residuals, q=self.level / 100, method="higher"
135                )
136            except Exception:
137                # numpy version < 1.22
138                self.quantile_ = np.quantile(
139                    a=absolute_residuals,
140                    q=self.level / 100,
141                    interpolation="higher",
142                )
143
144        if self.method == "localconformal":
145            mad_estimator = ExtraTreesRegressor()
146            normalizer = RegressorNormalizer(
147                self.obj, mad_estimator, AbsErrorErrFunc()
148            )
149            nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer)
150            self.icp_ = IcpRegressor(nc)
151            self.icp_.fit(X_train, y_train)
152            self.icp_.calibrate(X_calibration, y_calibration)
153
154            # FIX: Store calibration residuals from the ICP scorer so that
155            # simulation-based prediction intervals are available in predict().
156            raw_residuals = self.icp_.nc_function.err_func.apply(
157                self.icp_.nc_function.predict(X_calibration), y_calibration
158            )
159            self.calibrated_residuals_ = raw_residuals
160            self.calibrated_residuals_scaler_ = StandardScaler(
161                with_mean=True, with_std=True
162            )
163            self.scaled_calibrated_residuals_ = (
164                self.calibrated_residuals_scaler_.fit_transform(
165                    self.calibrated_residuals_.reshape(-1, 1)
166                ).ravel()
167            )
168
169        # Calculate AIC
170        # Get predictions
171        preds = self.obj.predict(X_calibration)
172
173        # Calculate SSE
174        self.sse_ = np.sum((y_calibration - preds) ** 2)
175
176        # Get number of parameters from the base model
177        n_params = (
178            getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1]
179        )
180
181        # Calculate AIC
182        n_samples = len(y_calibration)
183        temp = n_samples * np.log(self.sse_ / n_samples)
184        self.aic_ = temp + 2 * n_params
185        self.bic_ = temp + np.log(n_samples) * n_params
186
187        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, return_pi=False):
267    def predict(self, X, return_pi=False):
268        """Obtain predictions and prediction intervals
269
270        Args:
271
272            X: array-like, shape = [n_samples, n_features];
273                Testing set vectors, where n_samples is the number
274                of samples and n_features is the number of features.
275
276            return_pi: boolean
277                Whether the prediction interval is returned or not.
278                Default is False, for compatibility with other _estimators_.
279                If True, a tuple containing the predictions + lower and upper
280                bounds is returned.
281
282        """
283
284        if self.method == "splitconformal":
285            pred = self.obj.predict(X)
286
287        if self.method == "localconformal":
288            pred = self.icp_.predict(X)
289
290        # ------------------------------------------------------------------ #
291        # splitconformal
292        # ------------------------------------------------------------------ #
293        if self.method == "splitconformal":
294            if self.replications is None and self.type_pi is None:
295                # Plain split-conformal: symmetric quantile band
296                if return_pi:
297                    DescribeResult = namedtuple(
298                        "DescribeResult", ("mean", "lower", "upper")
299                    )
300                    return DescribeResult(
301                        pred, pred - self.quantile_, pred + self.quantile_
302                    )
303                else:
304                    return pred
305
306            else:
307                # FIX: simulation-based prediction intervals for splitconformal.
308                # Previously this branch raised NotImplementedError even though
309                # all the necessary logic was present — it was simply unreachable
310                # because the raise fired unconditionally.  The code has been
311                # moved into _simulate_from_residuals() and called here.
312
313                if self.type_pi is None:
314                    warnings.warn(
315                        "type_pi must be set when replications is not None; "
316                        "defaulting to 'kde'."
317                    )
318                if self.replications is None:
319                    warnings.warn(
320                        "replications must be set when type_pi is not None; "
321                        "defaulting to 100."
322                    )
323
324                (
325                    self.sims_,
326                    self.mean_,
327                    self.lower_,
328                    self.upper_,
329                ) = self._simulate_from_residuals(pred, X.shape[0])
330
331                DescribeResult = namedtuple(
332                    "DescribeResult", ("mean", "sims", "lower", "upper")
333                )
334                return DescribeResult(
335                    self.mean_, self.sims_, self.lower_, self.upper_
336                )
337
338        # ------------------------------------------------------------------ #
339        # localconformal
340        # ------------------------------------------------------------------ #
341        if self.method == "localconformal":
342            if self.replications is None:
343                if return_pi:
344                    predictions_bounds = self.icp_.predict(
345                        X, significance=1 - self.level
346                    )
347                    DescribeResult = namedtuple(
348                        "DescribeResult", ("mean", "lower", "upper")
349                    )
350                    return DescribeResult(
351                        pred,
352                        predictions_bounds[:, 0],
353                        predictions_bounds[:, 1],
354                    )
355                else:
356                    return pred
357
358            else:
359                # FIX: simulation-based prediction intervals for localconformal.
360                # Previously this always raised NotImplementedError.  Now we
361                # reuse the calibration residuals stored during fit() and apply
362                # the same simulation logic used by splitconformal via the
363                # shared helper _simulate_from_residuals().
364
365                if self.type_pi is None:
366                    warnings.warn(
367                        "type_pi must be set when replications is not None; "
368                        "defaulting to 'kde'."
369                    )
370
371                (
372                    self.sims_,
373                    self.mean_,
374                    self.lower_,
375                    self.upper_,
376                ) = self._simulate_from_residuals(pred, X.shape[0])
377
378                DescribeResult = namedtuple(
379                    "DescribeResult", ("mean", "sims", "lower", "upper")
380                )
381                return DescribeResult(
382                    self.mean_, self.sims_, self.lower_, self.upper_
383                )

Obtain predictions and prediction intervals

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.

return_pi: boolean
    Whether the prediction interval is returned or not.
    Default is False, for compatibility with other _estimators_.
    If True, a tuple containing the predictions + lower and upper
    bounds is returned.
class PredictionSet(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 20class PredictionSet(BaseEstimator, ClassifierMixin):
 21    """Class PredictionSet: Obtain prediction sets.
 22
 23    Attributes:
 24
 25        obj: an object;
 26            fitted object containing methods `fit` and `predict`
 27
 28        method: a string;
 29            method for constructing the prediction sets.
 30            Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
 31
 32        level: a float;
 33            Confidence level for prediction sets. Default is None,
 34            95 is equivalent to a miscoverage error of 5 (%)
 35
 36        seed: an integer;
 37            Reproducibility of fit (there's a random split between fitting and calibration data)
 38    """
 39
 40    def __init__(
 41        self,
 42        obj,
 43        method="icp",
 44        level=None,
 45        seed=123,
 46    ):
 47        self.obj = obj
 48        self.method = method
 49        self.level = level
 50        self.seed = seed
 51        if self.level is not None:
 52            self.alpha_ = 1 - self.level / 100
 53        self.quantile_ = None
 54        self.icp_ = None
 55        self.tcp_ = None
 56
 57        if self.method == "icp":
 58            self.icp_ = IcpClassifier(
 59                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 60            )
 61        elif self.method == "tcp":
 62            self.tcp_ = TcpClassifier(
 63                ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
 64            )
 65        else:
 66            raise ValueError("`self.method` must be in ('icp', 'tcp')")
 67
 68    def fit(self, X, y, sample_weight=None, **kwargs):
 69        """Fit the `method` to training data (X, y).
 70
 71        Args:
 72
 73            X: array-like, shape = [n_samples, n_features];
 74                Training set vectors, where n_samples is the number
 75                of samples and n_features is the number of features.
 76
 77            y: array-like, shape = [n_samples, ]; Target values.
 78
 79            sample_weight: array-like, shape = [n_samples]
 80                Sample weights.
 81
 82        """
 83        if self.method == "icp":
 84            X_train, X_calibration, y_train, y_calibration = train_test_split(
 85                X, y, test_size=0.5, random_state=self.seed
 86            )
 87            self.icp_.fit(X_train, y_train)
 88            self.icp_.calibrate(X_calibration, y_calibration)
 89
 90        elif self.method == "tcp":
 91            self.tcp_.fit(X, y)
 92
 93        return self
 94
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")
114
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]

Class PredictionSet: Obtain prediction sets.

Attributes:

obj: an object;
    fitted object containing methods `fit` and `predict`

method: a string;
    method for constructing the prediction sets.
    Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)

level: a float;
    Confidence level for prediction sets. Default is None,
    95 is equivalent to a miscoverage error of 5 (%)

seed: an integer;
    Reproducibility of fit (there's a random split between fitting and calibration data)
def fit(self, X, y, sample_weight=None, **kwargs):
68    def fit(self, X, y, sample_weight=None, **kwargs):
69        """Fit the `method` to training data (X, y).
70
71        Args:
72
73            X: array-like, shape = [n_samples, n_features];
74                Training set vectors, where n_samples is the number
75                of samples and n_features is the number of features.
76
77            y: array-like, shape = [n_samples, ]; Target values.
78
79            sample_weight: array-like, shape = [n_samples]
80                Sample weights.
81
82        """
83        if self.method == "icp":
84            X_train, X_calibration, y_train, y_calibration = train_test_split(
85                X, y, test_size=0.5, random_state=self.seed
86            )
87            self.icp_.fit(X_train, y_train)
88            self.icp_.calibrate(X_calibration, y_calibration)
89
90        elif self.method == "tcp":
91            self.tcp_.fit(X, y)
92
93        return self

Fit the method to training data (X, y).

Args:

X: array-like, shape = [n_samples, n_features];
    Training set vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples, ]; Target values.

sample_weight: array-like, shape = [n_samples]
    Sample weights.
def predict(self, X, **kwargs):
 95    def predict(self, X, **kwargs):
 96        """Obtain predictions and prediction sets
 97
 98        Args:
 99
100            X: array-like, shape = [n_samples, n_features];
101                Testing set vectors, where n_samples is the number
102                of samples and n_features is the number of features.
103
104        """
105
106        if self.method == "icp":
107            return self.icp_.predict(X, significance=self.alpha_, **kwargs)
108
109        elif self.method == "tcp":
110            return self.tcp_.predict(X, significance=self.alpha_, **kwargs)
111
112        else:
113            raise ValueError("`self.method` must be in ('icp', 'tcp')")

Obtain predictions and prediction sets

Args:

X: array-like, shape = [n_samples, n_features];
    Testing set vectors, where n_samples is the number
    of samples and n_features is the number of features.
def predict_proba(self, X):
115    def predict_proba(self, X):
116        predictions = self.predict(X)
117        return np.eye(len(np.unique(predictions)))[predictions]
class SimpleMultitaskClassifier(nnetsauce.Base, sklearn.base.ClassifierMixin):
 19class SimpleMultitaskClassifier(Base, ClassifierMixin):
 20    """Multitask Classification model based on regression models, with shared covariates
 21
 22    Parameters:
 23
 24        obj: object
 25            any object (must be a regression model) containing a method fit (obj.fit())
 26            and a method predict (obj.predict())
 27
 28        seed: int
 29            reproducibility seed
 30
 31    Attributes:
 32
 33        fit_objs_: dict
 34            objects adjusted to each individual time series
 35
 36        n_classes_: int
 37            number of classes for the classifier
 38
 39    Examples:
 40
 41    ```python
 42    import nnetsauce as ns
 43    import numpy as np
 44    from sklearn.datasets import load_breast_cancer
 45    from sklearn.linear_model import LinearRegression
 46    from sklearn.model_selection import train_test_split
 47    from sklearn import metrics
 48    from time import time
 49
 50    breast_cancer = load_breast_cancer()
 51    Z = breast_cancer.data
 52    t = breast_cancer.target
 53
 54    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
 55                                                        random_state=123+2*10)
 56
 57    # Linear Regression is used
 58    regr = LinearRegression()
 59    fit_obj = ns.SimpleMultitaskClassifier(regr)
 60
 61    start = time()
 62    fit_obj.fit(X_train, y_train)
 63    print(f"Elapsed {time() - start}")
 64
 65    print(fit_obj.score(X_test, y_test))
 66    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
 67
 68    start = time()
 69    preds = fit_obj.predict(X_test)
 70    print(f"Elapsed {time() - start}")
 71    print(metrics.classification_report(preds, y_test))
 72    ```
 73
 74    """
 75
 76    # construct the object -----
 77    _estimator_type = "classifier"
 78
 79    def __init__(
 80        self,
 81        obj,
 82    ):
 83        self.type_fit = "classification"
 84        self.obj = obj
 85        self.fit_objs_ = {}
 86        self.multioutput_model_ = None
 87        self.X_scaler_ = StandardScaler()
 88        self.scaled_X_ = None
 89
 90    def fit(self, X, y, sample_weight=None, **kwargs):
 91        """Fit SimpleMultitaskClassifier to training data (X, y).
 92
 93        Args:
 94
 95            X: {array-like}, shape = [n_samples, n_features]
 96                Training vectors, where n_samples is the number
 97                of samples and n_features is the number of features.
 98
 99            y: array-like, shape = [n_samples]
100                Target values.
101
102            **kwargs: additional parameters to be passed to
103                    self.cook_training_set or self.obj.fit
104
105        Returns:
106
107            self: object
108
109        """
110
111        assert mx.is_factor(y), "y must contain only integers"
112
113        self.classes_ = np.unique(y)  # for compatibility with sklearn
114        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
115
116        self.scaled_X_ = self.X_scaler_.fit_transform(X)
117
118        # multitask response
119        Y = mo.one_hot_encode2(y, self.n_classes_)
120
121        # Try MultiOutputRegressor first (more efficient)
122        try:
123            self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj))
124            try:
125                self.multioutput_model_.fit(
126                    self.scaled_X_, Y, sample_weight=sample_weight, **kwargs
127                )
128            except TypeError:
129                # If sample_weight not supported, try without it
130                self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs)
131        except Exception:
132            # Fallback: fit separate models for each class
133            self.multioutput_model_ = None
134            try:
135                for i in range(self.n_classes_):
136                    self.fit_objs_[i] = deepcopy(
137                        self.obj.fit(
138                            self.scaled_X_,
139                            Y[:, i],
140                            sample_weight=sample_weight,
141                            **kwargs
142                        )
143                    )
144            except TypeError:
145                for i in range(self.n_classes_):
146                    self.fit_objs_[i] = deepcopy(
147                        self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
148                    )
149        return self
150
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Args:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features.
159
160            **kwargs: additional parameters
161
162        Returns:
163
164            model predictions: {array-like}
165
166        """
167        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
168
169    def predict_proba(self, X, **kwargs):
170        """Predict probabilities for test data X.
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            **kwargs: additional parameters
179
180        Returns:
181
182            probability estimates for test data: {array-like}
183
184        """
185
186        shape_X = X.shape
187
188        if self.multioutput_model_ is not None:
189            # Use MultiOutputRegressor for prediction
190            if len(shape_X) == 1:  # one example
191                n_features = shape_X[0]
192                new_X = mo.rbind(
193                    X.reshape(1, n_features),
194                    np.ones(n_features).reshape(1, n_features),
195                )
196                Z = self.X_scaler_.transform(new_X, **kwargs)
197                probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :]
198            else:  # multiple rows
199                Z = self.X_scaler_.transform(X, **kwargs)
200                probs = self.multioutput_model_.predict(Z, **kwargs)
201        else:
202            # Use separate models for each class
203            probs = np.zeros((shape_X[0], self.n_classes_))
204
205            if len(shape_X) == 1:  # one example
206                n_features = shape_X[0]
207
208                new_X = mo.rbind(
209                    X.reshape(1, n_features),
210                    np.ones(n_features).reshape(1, n_features),
211                )
212
213                Z = self.X_scaler_.transform(new_X, **kwargs)
214
215                # Fallback to standard model
216                for i in range(self.n_classes_):
217                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
218
219            else:  # multiple rows
220                Z = self.X_scaler_.transform(X, **kwargs)
221
222                # Fallback to standard model
223                for i in range(self.n_classes_):
224                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
225
226        expit_raw_probs = expit(probs)
227
228        # Add small epsilon to avoid division by zero
229        row_sums = expit_raw_probs.sum(axis=1)[:, None]
230        row_sums[row_sums < 1e-10] = 1e-10
231
232        return expit_raw_probs / row_sums
233
234    def decision_function(self, X, **kwargs):
235        """Compute the decision function of X.
236
237        Parameters:
238            X: {array-like}, shape = [n_samples, n_features]
239                Samples to compute decision function for.
240
241            **kwargs: additional parameters to be passed to
242                    self.cook_test_set
243
244        Returns:
245            array-like of shape (n_samples,) or (n_samples, n_classes)
246            Decision function of the input samples. The order of outputs is the same
247            as that of the classes passed to fit.
248        """
249        if not hasattr(self.obj, "decision_function"):
250            # If base classifier doesn't have decision_function, use predict_proba
251            proba = self.predict_proba(X, **kwargs)
252            if proba.shape[1] == 2:
253                return proba[:, 1]  # For binary classification
254            return proba  # For multiclass
255
256        if len(X.shape) == 1:
257            n_features = X.shape[0]
258            new_X = mo.rbind(
259                X.reshape(1, n_features),
260                np.ones(n_features).reshape(1, n_features),
261            )
262
263            return (
264                self.obj.decision_function(
265                    self.cook_test_set(new_X, **kwargs), **kwargs
266                )
267            )[0]
268
269        return self.obj.decision_function(
270            self.cook_test_set(X, **kwargs), **kwargs
271        )
272
273    @property
274    def _estimator_type(self):
275        return "classifier"

Multitask Classification model based on regression models, with shared covariates

Parameters:

obj: object
    any object (must be a regression model) containing a method fit (obj.fit())
    and a method predict (obj.predict())

seed: int
    reproducibility seed

Attributes:

fit_objs_: dict
    objects adjusted to each individual time series

n_classes_: int
    number of classes for the classifier

Examples:

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target

X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
                                                    random_state=123+2*10)

# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, sample_weight=None, **kwargs):
 90    def fit(self, X, y, sample_weight=None, **kwargs):
 91        """Fit SimpleMultitaskClassifier to training data (X, y).
 92
 93        Args:
 94
 95            X: {array-like}, shape = [n_samples, n_features]
 96                Training vectors, where n_samples is the number
 97                of samples and n_features is the number of features.
 98
 99            y: array-like, shape = [n_samples]
100                Target values.
101
102            **kwargs: additional parameters to be passed to
103                    self.cook_training_set or self.obj.fit
104
105        Returns:
106
107            self: object
108
109        """
110
111        assert mx.is_factor(y), "y must contain only integers"
112
113        self.classes_ = np.unique(y)  # for compatibility with sklearn
114        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
115
116        self.scaled_X_ = self.X_scaler_.fit_transform(X)
117
118        # multitask response
119        Y = mo.one_hot_encode2(y, self.n_classes_)
120
121        # Try MultiOutputRegressor first (more efficient)
122        try:
123            self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj))
124            try:
125                self.multioutput_model_.fit(
126                    self.scaled_X_, Y, sample_weight=sample_weight, **kwargs
127                )
128            except TypeError:
129                # If sample_weight not supported, try without it
130                self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs)
131        except Exception:
132            # Fallback: fit separate models for each class
133            self.multioutput_model_ = None
134            try:
135                for i in range(self.n_classes_):
136                    self.fit_objs_[i] = deepcopy(
137                        self.obj.fit(
138                            self.scaled_X_,
139                            Y[:, i],
140                            sample_weight=sample_weight,
141                            **kwargs
142                        )
143                    )
144            except TypeError:
145                for i in range(self.n_classes_):
146                    self.fit_objs_[i] = deepcopy(
147                        self.obj.fit(self.scaled_X_, Y[:, i], **kwargs)
148                    )
149        return self

Fit SimpleMultitaskClassifier to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
151    def predict(self, X, **kwargs):
152        """Predict test data X.
153
154        Args:
155
156            X: {array-like}, shape = [n_samples, n_features]
157                Training vectors, where n_samples is the number
158                of samples and n_features is the number of features.
159
160            **kwargs: additional parameters
161
162        Returns:
163
164            model predictions: {array-like}
165
166        """
167        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
169    def predict_proba(self, X, **kwargs):
170        """Predict probabilities for test data X.
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            **kwargs: additional parameters
179
180        Returns:
181
182            probability estimates for test data: {array-like}
183
184        """
185
186        shape_X = X.shape
187
188        if self.multioutput_model_ is not None:
189            # Use MultiOutputRegressor for prediction
190            if len(shape_X) == 1:  # one example
191                n_features = shape_X[0]
192                new_X = mo.rbind(
193                    X.reshape(1, n_features),
194                    np.ones(n_features).reshape(1, n_features),
195                )
196                Z = self.X_scaler_.transform(new_X, **kwargs)
197                probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :]
198            else:  # multiple rows
199                Z = self.X_scaler_.transform(X, **kwargs)
200                probs = self.multioutput_model_.predict(Z, **kwargs)
201        else:
202            # Use separate models for each class
203            probs = np.zeros((shape_X[0], self.n_classes_))
204
205            if len(shape_X) == 1:  # one example
206                n_features = shape_X[0]
207
208                new_X = mo.rbind(
209                    X.reshape(1, n_features),
210                    np.ones(n_features).reshape(1, n_features),
211                )
212
213                Z = self.X_scaler_.transform(new_X, **kwargs)
214
215                # Fallback to standard model
216                for i in range(self.n_classes_):
217                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0]
218
219            else:  # multiple rows
220                Z = self.X_scaler_.transform(X, **kwargs)
221
222                # Fallback to standard model
223                for i in range(self.n_classes_):
224                    probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)
225
226        expit_raw_probs = expit(probs)
227
228        # Add small epsilon to avoid division by zero
229        row_sums = expit_raw_probs.sum(axis=1)[:, None]
230        row_sums[row_sums < 1e-10] = 1e-10
231
232        return expit_raw_probs / row_sums

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters

Returns:

probability estimates for test data: {array-like}
class Optimizer:
  9class Optimizer:
 10    """Optimizer class
 11
 12    Attributes:
 13
 14        type_optim: str
 15            type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
 16            or 'scd' (stochastic minibatch coordinate descent)
 17
 18        num_iters: int
 19            number of iterations of the optimizer
 20
 21        learning_rate: float
 22            step size
 23
 24        batch_prop: float
 25            proportion of the initial data used at each optimization step
 26
 27        learning_method: str
 28            "poly" - learning rate decreasing as a polynomial function
 29            of # of iterations (default)
 30            "exp" - learning rate decreasing as an exponential function
 31            of # of iterations
 32            "momentum" - gradient descent using momentum
 33
 34        randomization: str
 35            type of randomization applied at each step
 36            "strat" - stratified subsampling (default)
 37            "shuffle" - random subsampling
 38
 39        mass: float
 40            mass on velocity, for `method` == "momentum"
 41
 42        decay: float
 43            coefficient of decrease of the learning rate for
 44            `method` == "poly" and `method` == "exp"
 45
 46        tolerance: float
 47            early stopping parameter (convergence of loss function)
 48
 49        verbose: int
 50            controls verbosity of gradient descent
 51            0 - nothing is printed
 52            1 - a progress bar is printed
 53            2 - successive loss function values are printed
 54
 55    """
 56
 57    # construct the object -----
 58
 59    def __init__(
 60        self,
 61        type_optim="sgd",
 62        num_iters=100,
 63        learning_rate=0.01,
 64        batch_prop=1.0,
 65        learning_method="momentum",
 66        randomization="strat",
 67        mass=0.9,
 68        decay=0.1,
 69        tolerance=1e-3,
 70        verbose=1,
 71    ):
 72        self.type_optim = type_optim
 73        self.num_iters = num_iters
 74        self.learning_rate = learning_rate
 75        self.batch_prop = batch_prop
 76        self.learning_method = learning_method
 77        self.randomization = randomization
 78        self.mass = mass
 79        self.decay = decay
 80        self.tolerance = tolerance
 81        self.verbose = verbose
 82        self.opt = None
 83
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self
141
142    def one_hot_encode(self, y, n_classes):
143        return one_hot_encode(y, n_classes)

Optimizer class

Attributes:

type_optim: str
    type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
    or 'scd' (stochastic minibatch coordinate descent)

num_iters: int
    number of iterations of the optimizer

learning_rate: float
    step size

batch_prop: float
    proportion of the initial data used at each optimization step

learning_method: str
    "poly" - learning rate decreasing as a polynomial function
    of # of iterations (default)
    "exp" - learning rate decreasing as an exponential function
    of # of iterations
    "momentum" - gradient descent using momentum

randomization: str
    type of randomization applied at each step
    "strat" - stratified subsampling (default)
    "shuffle" - random subsampling

mass: float
    mass on velocity, for `method` == "momentum"

decay: float
    coefficient of decrease of the learning rate for
    `method` == "poly" and `method` == "exp"

tolerance: float
    early stopping parameter (convergence of loss function)

verbose: int
    controls verbosity of gradient descent
    0 - nothing is printed
    1 - a progress bar is printed
    2 - successive loss function values are printed
def fit(self, loss_func, response, x0, q=None, **kwargs):
 84    def fit(self, loss_func, response, x0, q=None, **kwargs):
 85        """Fit GLM model to training data (X, y).
 86
 87        Args:
 88
 89            loss_func: loss function
 90
 91            response: array-like, shape = [n_samples]
 92            target variable (used for subsampling)
 93
 94            x0: array-like, shape = [n_features]
 95                initial value provided to the optimizer
 96
 97            **kwargs: additional parameters to be passed to
 98                    loss function
 99
100        Returns:
101
102            self: object
103
104        """
105
106        if self.type_optim == "scd":
107            self.results = scd(
108                loss_func,
109                response=response,
110                x=x0,
111                num_iters=self.num_iters,
112                batch_prop=self.batch_prop,
113                learning_rate=self.learning_rate,
114                learning_method=self.learning_method,
115                mass=self.mass,
116                decay=self.decay,
117                randomization=self.randomization,
118                tolerance=self.tolerance,
119                verbose=self.verbose,
120                **kwargs
121            )
122
123        if self.type_optim == "sgd":
124            self.results = sgd(
125                loss_func,
126                response=response,
127                x=x0,
128                num_iters=self.num_iters,
129                batch_prop=self.batch_prop,
130                learning_rate=self.learning_rate,
131                learning_method=self.learning_method,
132                mass=self.mass,
133                decay=self.decay,
134                randomization=self.randomization,
135                tolerance=self.tolerance,
136                verbose=self.verbose,
137                **kwargs
138            )
139
140        return self

Fit GLM model to training data (X, y).

Args:

loss_func: loss function

response: array-like, shape = [n_samples]
target variable (used for subsampling)

x0: array-like, shape = [n_features]
    initial value provided to the optimizer

**kwargs: additional parameters to be passed to
        loss function

Returns:

self: object
class QuantileRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 37class QuantileRegressor(BaseEstimator, RegressorMixin):
 38    """
 39    Quantile Regressor.
 40
 41    Parameters:
 42
 43        obj: base model (regression model)
 44            The base regressor from which to build a
 45            quantile regressor.
 46
 47        level: int, default=95
 48            The level of the quantiles to compute.
 49
 50        scoring: str, default="predictions"
 51            The scoring to use for the optimization and constructing
 52            prediction intervals (predictions, residuals, conformal,
 53              studentized, conformal-studentized).
 54
 55    Attributes:
 56
 57        obj_ : base model (regression model)
 58            The base regressor from which to build a
 59            quantile regressor.
 60
 61        offset_multipliers_ : list
 62            The multipliers for the offset.
 63
 64        scoring_residuals_ : list
 65            The residuals for the scoring.
 66
 67        student_multiplier_ : float
 68            The multiplier for the student.
 69
 70    """
 71
 72    def __init__(self, obj, level=95, scoring="predictions"):
 73        assert scoring in (
 74            "predictions",
 75            "residuals",
 76            "conformal",
 77            "studentized",
 78            "conformal-studentized",
 79        ), "scoring must be 'predictions' or 'residuals' or 'conformal' or 'studentized' or 'conformal-studentized'"
 80        self.obj = obj
 81        self.level = level
 82        low_risk_level = (1 - level / 100) / 2
 83        self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level]
 84        self.scoring = scoring
 85        self.offset_multipliers_ = None
 86        self.obj_ = None
 87        self.scoring_residuals_ = None
 88        self.student_multiplier_ = None
 89
 90    def _compute_quantile_loss(self, residuals, quantile):
 91        """
 92        Compute the quantile loss for a given set of residuals and quantile.
 93        """
 94        return np.mean(
 95            residuals
 96            * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0))
 97        )
 98
 99    def _optimize_multiplier(
100        self,
101        y,
102        base_predictions,
103        prev_predictions,
104        scoring_residuals=None,
105        quantile=0.5,
106    ):
107        """
108        Optimize the multiplier for a given quantile.
109        """
110        if not 0 < quantile < 1:
111            raise ValueError("Quantile should be between 0 and 1.")
112
113        n = len(y)
114
115        def objective(log_multiplier):
116            """
117            Objective function for optimization.
118            """
119            # Convert to positive multiplier using exp
120            multiplier = np.exp(log_multiplier[0])
121            if self.scoring == "predictions":
122                assert (
123                    base_predictions is not None
124                ), "base_predictions must be not None"
125                # Calculate predictions
126                if prev_predictions is None:
127                    # For first quantile, subtract from conditional expectation
128                    predictions = base_predictions - multiplier * np.abs(
129                        base_predictions
130                    )
131                else:
132                    # For other quantiles, add to previous quantile
133                    offset = multiplier * np.abs(base_predictions)
134                    predictions = prev_predictions + offset
135            elif self.scoring in ("residuals", "conformal"):
136                assert (
137                    scoring_residuals is not None
138                ), "scoring_residuals must be not None"
139                # print("scoring_residuals", scoring_residuals)
140                # Calculate predictions
141                if prev_predictions is None:
142                    # For first quantile, subtract from conditional expectation
143                    predictions = base_predictions - multiplier * np.std(
144                        scoring_residuals
145                    ) / np.sqrt(len(scoring_residuals))
146                    # print("predictions", predictions)
147                else:
148                    # For other quantiles, add to previous quantile
149                    offset = (
150                        multiplier
151                        * np.std(scoring_residuals)
152                        / np.sqrt(len(scoring_residuals))
153                    )
154                    predictions = prev_predictions + offset
155            elif self.scoring in ("studentized", "conformal-studentized"):
156                assert (
157                    scoring_residuals is not None
158                ), "scoring_residuals must be not None"
159                # Calculate predictions
160                if prev_predictions is None:
161                    # For first quantile, subtract from conditional expectation
162                    predictions = (
163                        base_predictions - multiplier * self.student_multiplier_
164                    )
165                    # print("predictions", predictions)
166                else:
167                    # For other quantiles, add to previous quantile
168                    offset = multiplier * self.student_multiplier_
169                    predictions = prev_predictions + offset
170            else:
171                raise ValueError("Invalid argument 'scoring'")
172
173            return self._compute_quantile_loss(y - predictions, quantile)
174
175        # Optimize in log space for numerical stability
176        # bounds = [(-10, 10)]  # log space bounds
177        bounds = [(-100, 100)]  # log space bounds
178        result = differential_evolution(
179            objective,
180            bounds,
181            # popsize=15,
182            # maxiter=100,
183            # tol=1e-4,
184            popsize=25,
185            maxiter=200,
186            tol=1e-6,
187            disp=False,
188        )
189
190        return np.exp(result.x[0])
191
192    def fit(self, X, y):
193        """Fit the model to the data.
194
195        Parameters:
196
197            X: {array-like}, shape = [n_samples, n_features]
198                Training vectors, where n_samples is the number of samples and
199                n_features is the number of features.
200            y: array-like, shape = [n_samples]
201                Target values.
202        """
203        self.obj_ = clone(self.obj)
204
205        if self.scoring in ("predictions", "residuals"):
206            self.obj_.fit(X, y)
207            base_predictions = self.obj_.predict(X)
208            scoring_residuals = y - base_predictions
209            self.scoring_residuals_ = scoring_residuals
210
211        elif self.scoring == "conformal":
212            X_train, X_calib, y_train, y_calib = train_test_split(
213                X, y, test_size=0.5, random_state=42
214            )
215            self.obj_.fit(X_train, y_train)
216            scoring_residuals = y_calib - self.obj_.predict(
217                X_calib
218            )  # These are calibration predictions
219            self.scoring_residuals_ = scoring_residuals
220            # Update base_predictions to use training predictions for optimization
221            self.obj_.fit(X_calib, y_calib)
222            base_predictions = self.obj_.predict(X_calib)
223
224        elif self.scoring in ("studentized", "conformal-studentized"):
225            # Calculate student multiplier
226            if self.scoring == "conformal-studentized":
227                X_train, X_calib, y_train, y_calib = train_test_split(
228                    X, y, test_size=0.5, random_state=42
229                )
230                self.obj_.fit(X_train, y_train)
231                scoring_residuals = y_calib - self.obj_.predict(X_calib)
232                # Calculate studentized multiplier using calibration data
233                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
234                    len(y_calib) - 1
235                )
236                self.obj_.fit(X_calib, y_calib)
237                base_predictions = self.obj_.predict(X_calib)
238            else:  # regular studentized
239                self.obj_.fit(X, y)
240                base_predictions = self.obj_.predict(X)
241                scoring_residuals = y - base_predictions
242                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
243                    len(y) - 1
244                )
245
246        # Initialize storage for multipliers
247        self.offset_multipliers_ = []
248        # Keep track of current predictions for each quantile
249        current_predictions = None
250
251        # Fit each quantile sequentially
252        for i, quantile in enumerate(self.quantiles):
253            if self.scoring == "predictions":
254                multiplier = self._optimize_multiplier(
255                    y=y,
256                    base_predictions=base_predictions,
257                    prev_predictions=current_predictions,
258                    quantile=quantile,
259                )
260
261                self.offset_multipliers_.append(multiplier)
262
263                # Update current predictions
264                if current_predictions is None:
265                    # First quantile (lowest)
266                    current_predictions = (
267                        base_predictions - multiplier * np.abs(base_predictions)
268                    )
269                else:
270                    # Subsequent quantiles
271                    offset = multiplier * np.abs(base_predictions)
272                    current_predictions = current_predictions + offset
273
274            elif self.scoring == "residuals":
275                multiplier = self._optimize_multiplier(
276                    y=y,
277                    base_predictions=base_predictions,
278                    scoring_residuals=scoring_residuals,
279                    prev_predictions=current_predictions,
280                    quantile=quantile,
281                )
282
283                self.offset_multipliers_.append(multiplier)
284
285                # Update current predictions
286                if current_predictions is None:
287                    # First quantile (lowest)
288                    current_predictions = (
289                        base_predictions
290                        - multiplier
291                        * np.std(scoring_residuals)
292                        / np.sqrt(len(scoring_residuals))
293                    )
294                else:
295                    # Subsequent quantiles
296                    offset = (
297                        multiplier
298                        * np.std(scoring_residuals)
299                        / np.sqrt(len(scoring_residuals))
300                    )
301                    current_predictions = current_predictions + offset
302
303            elif self.scoring == "conformal":
304                multiplier = self._optimize_multiplier(
305                    y=y_calib,
306                    base_predictions=base_predictions,
307                    scoring_residuals=scoring_residuals,
308                    prev_predictions=current_predictions,
309                    quantile=quantile,
310                )
311
312                self.offset_multipliers_.append(multiplier)
313
314                # Update current predictions
315                if current_predictions is None:
316                    # First quantile (lowest)
317                    current_predictions = (
318                        base_predictions
319                        - multiplier
320                        * np.std(scoring_residuals)
321                        / np.sqrt(len(scoring_residuals))
322                    )
323                else:
324                    # Subsequent quantiles
325                    offset = (
326                        multiplier
327                        * np.std(scoring_residuals)
328                        / np.sqrt(len(scoring_residuals))
329                    )
330                    current_predictions = current_predictions + offset
331
332            elif self.scoring in ("studentized", "conformal-studentized"):
333                multiplier = self._optimize_multiplier(
334                    y=y_calib if self.scoring == "conformal-studentized" else y,
335                    base_predictions=base_predictions,
336                    scoring_residuals=scoring_residuals,
337                    prev_predictions=current_predictions,
338                    quantile=quantile,
339                )
340
341                self.offset_multipliers_.append(multiplier)
342
343                # Update current predictions
344                if current_predictions is None:
345                    current_predictions = (
346                        base_predictions - multiplier * self.student_multiplier_
347                    )
348                else:
349                    offset = multiplier * self.student_multiplier_
350                    current_predictions = current_predictions + offset
351
352        return self
353
354    def predict(self, X, return_pi=False):
355        """Predict the target variable.
356
357        Parameters:
358
359            X: {array-like}, shape = [n_samples, n_features]
360                Training vectors, where n_samples is the number of samples and
361                n_features is the number of features.
362
363            return_pi: bool, default=True
364                Whether to return the prediction intervals.
365        """
366        if self.obj_ is None or self.offset_multipliers_ is None:
367            raise ValueError("Model not fitted yet.")
368
369        base_predictions = self.obj_.predict(X)
370        all_predictions = []
371
372        if self.scoring == "predictions":
373            # Generate first quantile
374            current_predictions = base_predictions - self.offset_multipliers_[
375                0
376            ] * np.abs(base_predictions)
377            all_predictions.append(current_predictions)
378
379            # Generate remaining quantiles
380            for multiplier in self.offset_multipliers_[1:]:
381                offset = multiplier * np.abs(base_predictions)
382                current_predictions = current_predictions + offset
383                all_predictions.append(current_predictions)
384
385        elif self.scoring in ("residuals", "conformal"):
386            # Generate first quantile
387            current_predictions = base_predictions - self.offset_multipliers_[
388                0
389            ] * np.std(self.scoring_residuals_) / np.sqrt(
390                len(self.scoring_residuals_)
391            )
392            all_predictions.append(current_predictions)
393
394            # Generate remaining quantiles
395            for multiplier in self.offset_multipliers_[1:]:
396                offset = (
397                    multiplier
398                    * np.std(self.scoring_residuals_)
399                    / np.sqrt(len(self.scoring_residuals_))
400                )
401                current_predictions = current_predictions + offset
402                all_predictions.append(current_predictions)
403
404        elif self.scoring in ("studentized", "conformal-studentized"):
405            # Generate first quantile
406            current_predictions = (
407                base_predictions
408                - self.offset_multipliers_[0] * self.student_multiplier_
409            )
410            all_predictions.append(current_predictions)
411
412            # Generate remaining quantiles
413            for multiplier in self.offset_multipliers_[1:]:
414                offset = multiplier * self.student_multiplier_
415                current_predictions = current_predictions + offset
416                all_predictions.append(current_predictions)
417
418        if return_pi == False:
419            return np.asarray(all_predictions[1])
420
421        DescribeResult = namedtuple(
422            "DecribeResult", ["mean", "lower", "upper", "median"]
423        )
424        DescribeResult.mean = base_predictions
425        DescribeResult.lower = np.asarray(all_predictions[0])
426        DescribeResult.median = np.asarray(all_predictions[1])
427        DescribeResult.upper = np.asarray(all_predictions[2])
428
429        return DescribeResult

Quantile Regressor.

Parameters:

obj: base model (regression model)
    The base regressor from which to build a
    quantile regressor.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (regression model)
    The base regressor from which to build a
    quantile regressor.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y):
192    def fit(self, X, y):
193        """Fit the model to the data.
194
195        Parameters:
196
197            X: {array-like}, shape = [n_samples, n_features]
198                Training vectors, where n_samples is the number of samples and
199                n_features is the number of features.
200            y: array-like, shape = [n_samples]
201                Target values.
202        """
203        self.obj_ = clone(self.obj)
204
205        if self.scoring in ("predictions", "residuals"):
206            self.obj_.fit(X, y)
207            base_predictions = self.obj_.predict(X)
208            scoring_residuals = y - base_predictions
209            self.scoring_residuals_ = scoring_residuals
210
211        elif self.scoring == "conformal":
212            X_train, X_calib, y_train, y_calib = train_test_split(
213                X, y, test_size=0.5, random_state=42
214            )
215            self.obj_.fit(X_train, y_train)
216            scoring_residuals = y_calib - self.obj_.predict(
217                X_calib
218            )  # These are calibration predictions
219            self.scoring_residuals_ = scoring_residuals
220            # Update base_predictions to use training predictions for optimization
221            self.obj_.fit(X_calib, y_calib)
222            base_predictions = self.obj_.predict(X_calib)
223
224        elif self.scoring in ("studentized", "conformal-studentized"):
225            # Calculate student multiplier
226            if self.scoring == "conformal-studentized":
227                X_train, X_calib, y_train, y_calib = train_test_split(
228                    X, y, test_size=0.5, random_state=42
229                )
230                self.obj_.fit(X_train, y_train)
231                scoring_residuals = y_calib - self.obj_.predict(X_calib)
232                # Calculate studentized multiplier using calibration data
233                self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt(
234                    len(y_calib) - 1
235                )
236                self.obj_.fit(X_calib, y_calib)
237                base_predictions = self.obj_.predict(X_calib)
238            else:  # regular studentized
239                self.obj_.fit(X, y)
240                base_predictions = self.obj_.predict(X)
241                scoring_residuals = y - base_predictions
242                self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(
243                    len(y) - 1
244                )
245
246        # Initialize storage for multipliers
247        self.offset_multipliers_ = []
248        # Keep track of current predictions for each quantile
249        current_predictions = None
250
251        # Fit each quantile sequentially
252        for i, quantile in enumerate(self.quantiles):
253            if self.scoring == "predictions":
254                multiplier = self._optimize_multiplier(
255                    y=y,
256                    base_predictions=base_predictions,
257                    prev_predictions=current_predictions,
258                    quantile=quantile,
259                )
260
261                self.offset_multipliers_.append(multiplier)
262
263                # Update current predictions
264                if current_predictions is None:
265                    # First quantile (lowest)
266                    current_predictions = (
267                        base_predictions - multiplier * np.abs(base_predictions)
268                    )
269                else:
270                    # Subsequent quantiles
271                    offset = multiplier * np.abs(base_predictions)
272                    current_predictions = current_predictions + offset
273
274            elif self.scoring == "residuals":
275                multiplier = self._optimize_multiplier(
276                    y=y,
277                    base_predictions=base_predictions,
278                    scoring_residuals=scoring_residuals,
279                    prev_predictions=current_predictions,
280                    quantile=quantile,
281                )
282
283                self.offset_multipliers_.append(multiplier)
284
285                # Update current predictions
286                if current_predictions is None:
287                    # First quantile (lowest)
288                    current_predictions = (
289                        base_predictions
290                        - multiplier
291                        * np.std(scoring_residuals)
292                        / np.sqrt(len(scoring_residuals))
293                    )
294                else:
295                    # Subsequent quantiles
296                    offset = (
297                        multiplier
298                        * np.std(scoring_residuals)
299                        / np.sqrt(len(scoring_residuals))
300                    )
301                    current_predictions = current_predictions + offset
302
303            elif self.scoring == "conformal":
304                multiplier = self._optimize_multiplier(
305                    y=y_calib,
306                    base_predictions=base_predictions,
307                    scoring_residuals=scoring_residuals,
308                    prev_predictions=current_predictions,
309                    quantile=quantile,
310                )
311
312                self.offset_multipliers_.append(multiplier)
313
314                # Update current predictions
315                if current_predictions is None:
316                    # First quantile (lowest)
317                    current_predictions = (
318                        base_predictions
319                        - multiplier
320                        * np.std(scoring_residuals)
321                        / np.sqrt(len(scoring_residuals))
322                    )
323                else:
324                    # Subsequent quantiles
325                    offset = (
326                        multiplier
327                        * np.std(scoring_residuals)
328                        / np.sqrt(len(scoring_residuals))
329                    )
330                    current_predictions = current_predictions + offset
331
332            elif self.scoring in ("studentized", "conformal-studentized"):
333                multiplier = self._optimize_multiplier(
334                    y=y_calib if self.scoring == "conformal-studentized" else y,
335                    base_predictions=base_predictions,
336                    scoring_residuals=scoring_residuals,
337                    prev_predictions=current_predictions,
338                    quantile=quantile,
339                )
340
341                self.offset_multipliers_.append(multiplier)
342
343                # Update current predictions
344                if current_predictions is None:
345                    current_predictions = (
346                        base_predictions - multiplier * self.student_multiplier_
347                    )
348                else:
349                    offset = multiplier * self.student_multiplier_
350                    current_predictions = current_predictions + offset
351
352        return self

Fit the model to the data.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.
y: array-like, shape = [n_samples]
    Target values.
def predict(self, X, return_pi=False):
354    def predict(self, X, return_pi=False):
355        """Predict the target variable.
356
357        Parameters:
358
359            X: {array-like}, shape = [n_samples, n_features]
360                Training vectors, where n_samples is the number of samples and
361                n_features is the number of features.
362
363            return_pi: bool, default=True
364                Whether to return the prediction intervals.
365        """
366        if self.obj_ is None or self.offset_multipliers_ is None:
367            raise ValueError("Model not fitted yet.")
368
369        base_predictions = self.obj_.predict(X)
370        all_predictions = []
371
372        if self.scoring == "predictions":
373            # Generate first quantile
374            current_predictions = base_predictions - self.offset_multipliers_[
375                0
376            ] * np.abs(base_predictions)
377            all_predictions.append(current_predictions)
378
379            # Generate remaining quantiles
380            for multiplier in self.offset_multipliers_[1:]:
381                offset = multiplier * np.abs(base_predictions)
382                current_predictions = current_predictions + offset
383                all_predictions.append(current_predictions)
384
385        elif self.scoring in ("residuals", "conformal"):
386            # Generate first quantile
387            current_predictions = base_predictions - self.offset_multipliers_[
388                0
389            ] * np.std(self.scoring_residuals_) / np.sqrt(
390                len(self.scoring_residuals_)
391            )
392            all_predictions.append(current_predictions)
393
394            # Generate remaining quantiles
395            for multiplier in self.offset_multipliers_[1:]:
396                offset = (
397                    multiplier
398                    * np.std(self.scoring_residuals_)
399                    / np.sqrt(len(self.scoring_residuals_))
400                )
401                current_predictions = current_predictions + offset
402                all_predictions.append(current_predictions)
403
404        elif self.scoring in ("studentized", "conformal-studentized"):
405            # Generate first quantile
406            current_predictions = (
407                base_predictions
408                - self.offset_multipliers_[0] * self.student_multiplier_
409            )
410            all_predictions.append(current_predictions)
411
412            # Generate remaining quantiles
413            for multiplier in self.offset_multipliers_[1:]:
414                offset = multiplier * self.student_multiplier_
415                current_predictions = current_predictions + offset
416                all_predictions.append(current_predictions)
417
418        if return_pi == False:
419            return np.asarray(all_predictions[1])
420
421        DescribeResult = namedtuple(
422            "DecribeResult", ["mean", "lower", "upper", "median"]
423        )
424        DescribeResult.mean = base_predictions
425        DescribeResult.lower = np.asarray(all_predictions[0])
426        DescribeResult.median = np.asarray(all_predictions[1])
427        DescribeResult.upper = np.asarray(all_predictions[2])
428
429        return DescribeResult

Predict the target variable.

Parameters:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number of samples and
    n_features is the number of features.

return_pi: bool, default=True
    Whether to return the prediction intervals.
class QuantileClassifier(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 43class QuantileClassifier(BaseEstimator, ClassifierMixin):
 44    """
 45    Quantile Classifier.
 46
 47    Parameters:
 48
 49        obj: base model (classification model)
 50            The base classifier from which to build a
 51            quantile classifier.
 52
 53        level: int, default=95
 54            The level of the quantiles to compute.
 55
 56        scoring: str, default="predictions"
 57            The scoring to use for the optimization and constructing
 58            prediction intervals (predictions, residuals, conformal,
 59              studentized, conformal-studentized).
 60
 61    Attributes:
 62
 63        obj_ : base model (classification model)
 64            The base classifier from which to build a
 65            quantile classifier.
 66
 67        offset_multipliers_ : list
 68            The multipliers for the offset.
 69
 70        scoring_residuals_ : list
 71            The residuals for the scoring.
 72
 73        student_multiplier_ : float
 74            The multiplier for the student.
 75
 76
 77    """
 78
 79    def __init__(self, obj, level=95, scoring="predictions"):
 80        assert scoring in (
 81            "predictions",
 82            "residuals",
 83            "conformal",
 84            "studentized",
 85            "conformal-studentized",
 86        ), "scoring must be 'predictions' or 'residuals' or 'conformal' or 'studentized' or 'conformal-studentized'"
 87        self.obj = obj
 88        self.level = level
 89        self.scoring = scoring
 90        quantileregressor = QuantileRegressor(
 91            self.obj, self.level, self.scoring
 92        )
 93        quantileregressor.predict = partial(
 94            quantileregressor.predict, return_pi=False
 95        )
 96        self.obj_ = SimpleMultitaskClassifier(quantileregressor)
 97
 98    def fit(self, X, y, **kwargs):
 99        self.obj_.fit(X, y, **kwargs)
100
101    def predict(self, X, **kwargs):
102        return self.obj_.predict(X, **kwargs)
103
104    def predict_proba(self, X, **kwargs):
105        return self.obj_.predict_proba(X, **kwargs)

Quantile Classifier.

Parameters:

obj: base model (classification model)
    The base classifier from which to build a
    quantile classifier.

level: int, default=95
    The level of the quantiles to compute.

scoring: str, default="predictions"
    The scoring to use for the optimization and constructing
    prediction intervals (predictions, residuals, conformal,
      studentized, conformal-studentized).

Attributes:

obj_ : base model (classification model)
    The base classifier from which to build a
    quantile classifier.

offset_multipliers_ : list
    The multipliers for the offset.

scoring_residuals_ : list
    The residuals for the scoring.

student_multiplier_ : float
    The multiplier for the student.
def fit(self, X, y, **kwargs):
98    def fit(self, X, y, **kwargs):
99        self.obj_.fit(X, y, **kwargs)
def predict(self, X, **kwargs):
101    def predict(self, X, **kwargs):
102        return self.obj_.predict(X, **kwargs)
def predict_proba(self, X, **kwargs):
104    def predict_proba(self, X, **kwargs):
105        return self.obj_.predict_proba(X, **kwargs)
class RandomBagRegressor(nnetsauce.randombag.bag.RandomBag, sklearn.base.RegressorMixin):
 18class RandomBagRegressor(RandomBag, RegressorMixin):
 19    """Randomized 'Bagging' Regression model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model''s
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    ```python
 93    import numpy as np
 94    import nnetsauce as ns
 95    from sklearn.datasets import fetch_california_housing
 96    from sklearn.tree import DecisionTreeRegressor
 97    from sklearn.model_selection import train_test_split
 98
 99    X, y = fetch_california_housing(return_X_y=True, as_frame=False)
100
101    # split data into training test and test set
102    X_train, X_test, y_train, y_test = train_test_split(X, y,
103                                                        test_size=0.2, random_state=13)
104
105    # Requires further tuning
106    obj = DecisionTreeRegressor(max_depth=3, random_state=123)
107    obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
108                                n_estimators=50,
109                                col_sample=0.9, row_sample=0.9,
110                                dropout=0, n_clusters=0, verbose=1)
111
112    obj2.fit(X_train, y_train)
113
114    print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
115
116    ```
117
118    """
119
120    # construct the object -----
121
122    def __init__(
123        self,
124        obj,
125        n_estimators=10,
126        n_hidden_features=1,
127        activation_name="relu",
128        a=0.01,
129        nodes_sim="sobol",
130        bias=True,
131        dropout=0,
132        direct_link=False,
133        n_clusters=2,
134        cluster_encode=True,
135        type_clust="kmeans",
136        type_scaling=("std", "std", "std"),
137        col_sample=1,
138        row_sample=1,
139        n_jobs=None,
140        seed=123,
141        verbose=1,
142        backend="cpu",
143    ):
144        super().__init__(
145            obj=obj,
146            n_estimators=n_estimators,
147            n_hidden_features=n_hidden_features,
148            activation_name=activation_name,
149            a=a,
150            nodes_sim=nodes_sim,
151            bias=bias,
152            dropout=dropout,
153            direct_link=direct_link,
154            n_clusters=n_clusters,
155            cluster_encode=cluster_encode,
156            type_clust=type_clust,
157            type_scaling=type_scaling,
158            col_sample=col_sample,
159            row_sample=row_sample,
160            seed=seed,
161            backend=backend,
162        )
163
164        self.type_fit = "regression"
165        self.verbose = verbose
166        self.n_jobs = n_jobs
167        self.voter_ = {}
168
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self
242
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Randomized 'Bagging' Regression model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model''s
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True, as_frame=False)

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=13)

# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
                            n_estimators=50,
                            col_sample=0.9, row_sample=0.9,
                            dropout=0, n_clusters=0, verbose=1)

obj2.fit(X_train, y_train)

print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
def fit(self, X, y, **kwargs):
169    def fit(self, X, y, **kwargs):
170        """Fit Random 'Bagging' model to training data (X, y).
171
172        Args:
173
174            X: {array-like}, shape = [n_samples, n_features]
175                Training vectors, where n_samples is the number
176                of samples and n_features is the number of features.
177
178            y: array-like, shape = [n_samples]
179                Target values.
180
181            **kwargs: additional parameters to be passed to
182                    self.cook_training_set or self.obj.fit
183
184        Returns:
185
186            self: object
187
188        """
189
190        base_learner = CustomRegressor(
191            self.obj,
192            n_hidden_features=self.n_hidden_features,
193            activation_name=self.activation_name,
194            a=self.a,
195            nodes_sim=self.nodes_sim,
196            bias=self.bias,
197            dropout=self.dropout,
198            direct_link=self.direct_link,
199            n_clusters=self.n_clusters,
200            type_clust=self.type_clust,
201            type_scaling=self.type_scaling,
202            col_sample=self.col_sample,
203            row_sample=self.row_sample,
204            seed=self.seed,
205        )
206
207        # 1 - Sequential training -----
208
209        if self.n_jobs is None:
210            self.voter_ = rbagloop_regression(
211                base_learner, X, y, self.n_estimators, self.verbose, self.seed
212            )
213
214            self.n_estimators = len(self.voter_)
215
216            return self
217
218        # 2 - Parallel training -----
219        # buggy
220        # if self.n_jobs is not None:
221        def fit_estimators(m):
222            base_learner__ = deepcopy(base_learner)
223            base_learner__.set_params(seed=self.seed + m * 1000)
224            base_learner__.fit(X, y, **kwargs)
225            return base_learner__
226
227        if self.verbose == 1:
228            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
229                delayed(fit_estimators)(m)
230                for m in tqdm(range(self.n_estimators))
231            )
232        else:
233            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
234                delayed(fit_estimators)(m) for m in range(self.n_estimators)
235            )
236
237        self.voter_ = {i: elt for i, elt in enumerate(voters_list)}
238
239        self.n_estimators = len(self.voter_)
240
241        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
243    def predict(self, X, weights=None, **kwargs):
244        """Predict for test data X.
245
246        Args:
247
248            X: {array-like}, shape = [n_samples, n_features]
249                Training vectors, where n_samples is the number
250                of samples and n_features is the number of features.
251
252            **kwargs: additional parameters to be passed to
253                    self.cook_test_set
254
255        Returns:
256
257            estimates for test data: {array-like}
258
259        """
260
261        def calculate_preds(voter, weights=None):
262            ensemble_preds = 0
263
264            n_iter = len(voter)
265
266            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
267
268            if weights is None:
269                for idx, elt in voter.items():
270                    ensemble_preds += elt.predict(X)
271
272                return ensemble_preds / n_iter
273
274            # if weights is not None:
275            for idx, elt in voter.items():
276                ensemble_preds += weights[idx] * elt.predict(X)
277
278            return ensemble_preds
279
280        # end calculate_preds ----
281
282        if weights is None:
283            return calculate_preds(self.voter_)
284
285        # if weights is not None:
286        self.weights = weights
287
288        return calculate_preds(self.voter_, weights)

Predict for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

estimates for test data: {array-like}
class RandomBagClassifier(nnetsauce.randombag.bag.RandomBag, sklearn.base.ClassifierMixin):
 18class RandomBagClassifier(RandomBag, ClassifierMixin):
 19    """Randomized 'Bagging' Classification model
 20
 21    Parameters:
 22
 23        obj: object
 24            any object containing a method fit (obj.fit()) and a method predict
 25            (obj.predict())
 26
 27        n_estimators: int
 28            number of boosting iterations
 29
 30        n_hidden_features: int
 31            number of nodes in the hidden layer
 32
 33        activation_name: str
 34            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 35
 36        a: float
 37            hyperparameter for 'prelu' or 'elu' activation function
 38
 39        nodes_sim: str
 40            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 41            'uniform'
 42
 43        bias: boolean
 44            indicates if the hidden layer contains a bias term (True) or not
 45            (False)
 46
 47        dropout: float
 48            regularization parameter; (random) percentage of nodes dropped out
 49            of the training
 50
 51        direct_link: boolean
 52            indicates if the original predictors are included (True) in model's
 53            fitting or not (False)
 54
 55        n_clusters: int
 56            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 57                no clustering)
 58
 59        cluster_encode: bool
 60            defines how the variable containing clusters is treated (default is one-hot)
 61            if `False`, then labels are used, without one-hot encoding
 62
 63        type_clust: str
 64            type of clustering method: currently k-means ('kmeans') or Gaussian
 65            Mixture Model ('gmm')
 66
 67        type_scaling: a tuple of 3 strings
 68            scaling methods for inputs, hidden layer, and clustering respectively
 69            (and when relevant).
 70            Currently available: standardization ('std') or MinMax scaling ('minmax')
 71
 72        col_sample: float
 73            percentage of covariates randomly chosen for training
 74
 75        row_sample: float
 76            percentage of rows chosen for training, by stratified bootstrapping
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        voter_: dict
 87            dictionary containing all the fitted base-learners
 88
 89
 90    Examples:
 91
 92    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py)
 93
 94    ```python
 95    import nnetsauce as ns
 96    from sklearn.datasets import load_breast_cancer
 97    from sklearn.tree import DecisionTreeClassifier
 98    from sklearn.model_selection import train_test_split
 99    from sklearn import metrics
100    from time import time
101
102
103    breast_cancer = load_breast_cancer()
104    Z = breast_cancer.data
105    t = breast_cancer.target
106    np.random.seed(123)
107    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
108
109    # decision tree
110    clf = DecisionTreeClassifier(max_depth=2, random_state=123)
111    fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
112                                    direct_link=True,
113                                    n_estimators=100,
114                                    col_sample=0.9, row_sample=0.9,
115                                    dropout=0.3, n_clusters=0, verbose=1)
116
117    start = time()
118    fit_obj.fit(X_train, y_train)
119    print(f"Elapsed {time() - start}")
120
121    print(fit_obj.score(X_test, y_test))
122    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
123
124    start = time()
125    preds = fit_obj.predict(X_test)
126    print(f"Elapsed {time() - start}")
127    print(metrics.classification_report(preds, y_test))
128    ```
129
130    """
131
132    # construct the object -----
133    _estimator_type = "classifier"
134
135    def __init__(
136        self,
137        obj,
138        n_estimators=10,
139        n_hidden_features=1,
140        activation_name="relu",
141        a=0.01,
142        nodes_sim="sobol",
143        bias=True,
144        dropout=0,
145        direct_link=False,
146        n_clusters=2,
147        cluster_encode=True,
148        type_clust="kmeans",
149        type_scaling=("std", "std", "std"),
150        col_sample=1,
151        row_sample=1,
152        n_jobs=None,
153        seed=123,
154        verbose=1,
155        backend="cpu",
156    ):
157        super().__init__(
158            obj=obj,
159            n_estimators=n_estimators,
160            n_hidden_features=n_hidden_features,
161            activation_name=activation_name,
162            a=a,
163            nodes_sim=nodes_sim,
164            bias=bias,
165            dropout=dropout,
166            direct_link=direct_link,
167            n_clusters=n_clusters,
168            cluster_encode=cluster_encode,
169            type_clust=type_clust,
170            type_scaling=type_scaling,
171            col_sample=col_sample,
172            row_sample=row_sample,
173            seed=seed,
174            backend=backend,
175        )
176
177        self.type_fit = "classification"
178        self.verbose = verbose
179        self.n_jobs = n_jobs
180        self.voter_ = {}
181
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self
263
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
282
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba
380
381    @property
382    def _estimator_type(self):
383        return "classifier"

Randomized 'Bagging' Classification model

Parameters:

obj: object
    any object containing a method fit (obj.fit()) and a method predict
    (obj.predict())

n_estimators: int
    number of boosting iterations

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

col_sample: float
    percentage of covariates randomly chosen for training

row_sample: float
    percentage of rows chosen for training, by stratified bootstrapping

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

voter_: dict
    dictionary containing all the fitted base-learners

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py

import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time


breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
                                direct_link=True,
                                n_estimators=100,
                                col_sample=0.9, row_sample=0.9,
                                dropout=0.3, n_clusters=0, verbose=1)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
182    def fit(self, X, y, **kwargs):
183        """Fit Random 'Bagging' model to training data (X, y).
184
185        Args:
186
187            X: {array-like}, shape = [n_samples, n_features]
188                Training vectors, where n_samples is the number
189                of samples and n_features is the number of features.
190
191            y: array-like, shape = [n_samples]
192                Target values.
193
194            **kwargs: additional parameters to be passed to
195                    self.cook_training_set or self.obj.fit
196
197        Returns:
198
199            self: object
200
201        """
202
203        assert mx.is_factor(y), "y must contain only integers"
204
205        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
206
207        # training
208        self.n_classes = len(np.unique(y))
209
210        base_learner = CustomClassifier(
211            self.obj,
212            n_hidden_features=self.n_hidden_features,
213            activation_name=self.activation_name,
214            a=self.a,
215            nodes_sim=self.nodes_sim,
216            bias=self.bias,
217            dropout=self.dropout,
218            direct_link=self.direct_link,
219            n_clusters=self.n_clusters,
220            type_clust=self.type_clust,
221            type_scaling=self.type_scaling,
222            col_sample=self.col_sample,
223            row_sample=self.row_sample,
224            seed=self.seed,
225            cv_calibration=None,
226        )
227
228        # 1 - Sequential training -----
229
230        if self.n_jobs is None:
231            self.voter_ = rbagloop_classification(
232                base_learner, X, y, self.n_estimators, self.verbose, self.seed
233            )
234
235            self.n_estimators = len(self.voter_)
236
237            return self
238
239        # 2 - Parallel training -----
240        # buggy
241        # if self.n_jobs is not None:
242        def fit_estimators(m):
243            base_learner__ = deepcopy(base_learner)
244            base_learner__.set_params(seed=self.seed + m * 1000)
245            base_learner__.fit(X, y, **kwargs)
246            return base_learner__
247
248        if self.verbose == 1:
249            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
250                delayed(fit_estimators)(m)
251                for m in tqdm(range(self.n_estimators))
252            )
253        else:
254            voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")(
255                delayed(fit_estimators)(m) for m in range(self.n_estimators)
256            )
257
258        self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)}
259
260        self.n_estimators = len(self.voter_)
261        self.classes_ = np.unique(y)
262        return self

Fit Random 'Bagging' model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, weights=None, **kwargs):
264    def predict(self, X, weights=None, **kwargs):
265        """Predict test data X.
266
267        Args:
268
269            X: {array-like}, shape = [n_samples, n_features]
270                Training vectors, where n_samples is the number
271                of samples and n_features is the number of features.
272
273            **kwargs: additional parameters to be passed to
274                    self.cook_test_set
275
276        Returns:
277
278            model predictions: {array-like}
279
280        """
281        return self.predict_proba(X, weights, **kwargs).argmax(axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, weights=None, **kwargs):
283    def predict_proba(self, X, weights=None, **kwargs):
284        """Predict probabilities for test data X.
285
286        Args:
287
288            X: {array-like}, shape = [n_samples, n_features]
289                Training vectors, where n_samples is the number
290                of samples and n_features is the number of features.
291
292            **kwargs: additional parameters to be passed to
293                    self.cook_test_set
294
295        Returns:
296
297            probability estimates for test data: {array-like}
298
299        """
300
301        def calculate_probas(voter, weights=None, verbose=None):
302            ensemble_proba = 0
303
304            n_iter = len(voter)
305
306            assert n_iter > 0, "no estimator found in `RandomBag` ensemble"
307
308            if weights is None:
309                for idx, elt in voter.items():
310                    try:
311                        ensemble_proba += elt.predict_proba(X)
312
313                        # if verbose == 1:
314                        #    pbar.update(idx)
315
316                    except:
317                        continue
318
319                # if verbose == 1:
320                #    pbar.update(n_iter)
321
322                return ensemble_proba / n_iter
323
324            # if weights is not None:
325            for idx, elt in voter.items():
326                ensemble_proba += weights[idx] * elt.predict_proba(X)
327
328                # if verbose == 1:
329                #    pbar.update(idx)
330
331            # if verbose == 1:
332            #    pbar.update(n_iter)
333
334            return ensemble_proba
335
336        # end calculate_probas ----
337
338        if self.n_jobs is None:
339            # if self.verbose == 1:
340            #    pbar = Progbar(self.n_estimators)
341
342            if weights is None:
343                return calculate_probas(self.voter_, verbose=self.verbose)
344
345            # if weights is not None:
346            self.weights = weights
347
348            return calculate_probas(self.voter_, weights, verbose=self.verbose)
349
350        # if self.n_jobs is not None:
351        def predict_estimator(m):
352            try:
353                return self.voter_[m].predict_proba(X)
354            except:
355                pass
356
357        if self.verbose == 1:
358            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
359                delayed(predict_estimator)(m)
360                for m in tqdm(range(self.n_estimators))
361            )
362
363        else:
364            preds = Parallel(n_jobs=self.n_jobs, prefer="threads")(
365                delayed(predict_estimator)(m) for m in range(self.n_estimators)
366            )
367
368        ensemble_proba = 0
369
370        if weights is None:
371            for i in range(self.n_estimators):
372                ensemble_proba += preds[i]
373
374            return ensemble_proba / self.n_estimators
375
376        for i in range(self.n_estimators):
377            ensemble_proba += weights[i] * preds[i]
378
379        return ensemble_proba

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class RandomFourierEstimator(sklearn.base.BaseEstimator):
 23class RandomFourierEstimator(BaseEstimator):
 24    def __init__(
 25        self, estimator, n_components=100, gamma=1.0, random_state=None
 26    ):
 27        """
 28        Random Fourier Features transformation with a given estimator.
 29
 30        Parameters:
 31        - estimator: A scikit-learn estimator (classifier, regressor, etc.).
 32        - n_components: Number of random Fourier features.
 33        - gamma: Hyperparameter for RBF kernel approximation.
 34        - random_state: Random state for reproducibility.
 35        """
 36        self.estimator = estimator
 37        self.n_components = n_components
 38        self.gamma = gamma
 39        self.random_state = random_state
 40
 41        # Dynamically set the estimator type and appropriate mixin
 42        estimator_type = _get_estimator_type(estimator)
 43        if estimator_type == "classifier":
 44            self._estimator_type = "classifier"
 45            # Add ClassifierMixin to the class hierarchy
 46            if not isinstance(self, ClassifierMixin):
 47                self.__class__ = type(
 48                    self.__class__.__name__,
 49                    (self.__class__, ClassifierMixin),
 50                    dict(self.__class__.__dict__),
 51                )
 52        elif estimator_type == "regressor":
 53            self._estimator_type = "regressor"
 54            # Add RegressorMixin to the class hierarchy
 55            if not isinstance(self, RegressorMixin):
 56                self.__class__ = type(
 57                    self.__class__.__name__,
 58                    (self.__class__, RegressorMixin),
 59                    dict(self.__class__.__dict__),
 60                )
 61
 62    def fit(self, X, y=None):
 63        """
 64        Fit the Random Fourier feature transformer and the estimator.
 65        """
 66        X = check_array(X)
 67
 68        # Initialize and fit the Random Fourier Feature transformer
 69        self.rff_ = RBFSampler(
 70            n_components=self.n_components,
 71            gamma=self.gamma,
 72            random_state=self.random_state,
 73        )
 74        X_transformed = self.rff_.fit_transform(X)
 75
 76        # Fit the underlying estimator on the transformed data
 77        self.estimator.fit(X_transformed, y)
 78
 79        return self
 80
 81    def partial_fit(self, X, y, classes=None):
 82        """
 83        Incrementally fit the Random Fourier feature transformer and the estimator.
 84        """
 85        X = check_array(X)
 86
 87        # Check if RFF transformer is already fitted
 88        if not hasattr(self, "rff_"):
 89            # First call - fit the transformer
 90            self.rff_ = RBFSampler(
 91                n_components=self.n_components,
 92                gamma=self.gamma,
 93                random_state=self.random_state,
 94            )
 95            X_transformed = self.rff_.fit_transform(X)
 96        else:
 97            # Subsequent calls - only transform
 98            X_transformed = self.rff_.transform(X)
 99
100        # If estimator supports partial_fit, we use it, otherwise raise an error
101        if hasattr(self.estimator, "partial_fit"):
102            self.estimator.partial_fit(X_transformed, y, classes=classes)
103        else:
104            raise ValueError(
105                f"The estimator {type(self.estimator).__name__} does not support partial_fit method."
106            )
107
108        return self
109
110    def predict(self, X):
111        """
112        Predict using the Random Fourier transformed data.
113        """
114        check_is_fitted(self, ["rff_"])
115        X = check_array(X)
116
117        # Transform the input data
118        X_transformed = self.rff_.transform(X)
119
120        # Predict using the underlying estimator
121        return self.estimator.predict(X_transformed)
122
123    def predict_proba(self, X):
124        """
125        Predict class probabilities (only for classifiers).
126        """
127        if (
128            not hasattr(self, "_estimator_type")
129            or self._estimator_type != "classifier"
130        ):
131            raise AttributeError(
132                "predict_proba is not available for this estimator type."
133            )
134
135        check_is_fitted(self, ["rff_"])
136        X = check_array(X)
137
138        if not hasattr(self.estimator, "predict_proba"):
139            raise ValueError(
140                f"The estimator {type(self.estimator).__name__} does not support predict_proba."
141            )
142
143        # Transform the input data
144        X_transformed = self.rff_.transform(X)
145
146        # Predict probabilities using the underlying estimator
147        return self.estimator.predict_proba(X_transformed)
148
149    def predict_log_proba(self, X):
150        """
151        Predict class log probabilities (only for classifiers).
152        """
153        if (
154            not hasattr(self, "_estimator_type")
155            or self._estimator_type != "classifier"
156        ):
157            raise AttributeError(
158                "predict_log_proba is not available for this estimator type."
159            )
160
161        check_is_fitted(self, ["rff_"])
162        X = check_array(X)
163
164        if not hasattr(self.estimator, "predict_log_proba"):
165            raise ValueError(
166                f"The estimator {type(self.estimator).__name__} does not support predict_log_proba."
167            )
168
169        # Transform the input data
170        X_transformed = self.rff_.transform(X)
171
172        return self.estimator.predict_log_proba(X_transformed)
173
174    def decision_function(self, X):
175        """
176        Decision function (only for classifiers).
177        """
178        if (
179            not hasattr(self, "_estimator_type")
180            or self._estimator_type != "classifier"
181        ):
182            raise AttributeError(
183                "decision_function is not available for this estimator type."
184            )
185
186        check_is_fitted(self, ["rff_"])
187        X = check_array(X)
188
189        if not hasattr(self.estimator, "decision_function"):
190            raise ValueError(
191                f"The estimator {type(self.estimator).__name__} does not support decision_function."
192            )
193
194        # Transform the input data
195        X_transformed = self.rff_.transform(X)
196
197        return self.estimator.decision_function(X_transformed)
198
199    def score(self, X, y):
200        """
201        Evaluate the model performance.
202        """
203        check_is_fitted(self, ["rff_"])
204        X = check_array(X)
205
206        # Transform the input data
207        X_transformed = self.rff_.transform(X)
208
209        # Evaluate using the underlying estimator's score method
210        return self.estimator.score(X_transformed, y)
211
212    @property
213    def classes_(self):
214        """Classes labels (only for classifiers)."""
215        if (
216            hasattr(self, "_estimator_type")
217            and self._estimator_type == "classifier"
218        ):
219            return getattr(self.estimator, "classes_", None)
220        else:
221            raise AttributeError(
222                "classes_ is not available for this estimator type."
223            )
224
225    def get_params(self, deep=True):
226        """
227        Get parameters for this estimator.
228        """
229        params = {}
230
231        # Get estimator parameters with proper prefixing
232        if deep:
233            estimator_params = self.estimator.get_params(deep=True)
234            for key, value in estimator_params.items():
235                params[f"estimator__{key}"] = value
236
237        # Add our own parameters
238        params.update(
239            {
240                "estimator": self.estimator,
241                "n_components": self.n_components,
242                "gamma": self.gamma,
243                "random_state": self.random_state,
244            }
245        )
246
247        return params
248
249    def set_params(self, **params):
250        """
251        Set the parameters of this estimator.
252        """
253        # Separate our parameters from estimator parameters
254        our_params = {}
255        estimator_params = {}
256
257        for param, value in params.items():
258            if param.startswith("estimator__"):
259                # Remove the 'estimator__' prefix
260                estimator_params[param[11:]] = value
261            elif param in [
262                "estimator",
263                "n_components",
264                "gamma",
265                "random_state",
266            ]:
267                our_params[param] = value
268            else:
269                # Assume it's an estimator parameter without prefix
270                estimator_params[param] = value
271
272        # Set our parameters
273        for param, value in our_params.items():
274            setattr(self, param, value)
275
276        # If estimator changed, update the estimator type
277        if "estimator" in our_params:
278            self.__init__(
279                self.estimator, self.n_components, self.gamma, self.random_state
280            )
281
282        # Set estimator parameters
283        if estimator_params:
284            self.estimator.set_params(**estimator_params)
285
286        # If RFF parameters changed and model is fitted, we need to refit
287        if hasattr(self, "rff_") and (
288            "n_components" in our_params
289            or "gamma" in our_params
290            or "random_state" in our_params
291        ):
292            # Remove the fitted transformer so it gets recreated on next fit
293            delattr(self, "rff_")
294
295        return self

Base class for all estimators in scikit-learn.

Inheriting from this class provides default implementations of:

  • setting and getting parameters used by GridSearchCV and friends;
  • textual and HTML representation displayed in terminals and IDEs;
  • estimator serialization;
  • parameters validation;
  • data validation;
  • feature names validation.

Read more in the :ref:User Guide <rolling_your_own_estimator>.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Examples

>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
...     def __init__(self, *, param=1):
...         self.param = param
...     def fit(self, X, y=None):
...         self.is_fitted_ = True
...         return self
...     def predict(self, X):
...         return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
def fit(self, X, y=None):
62    def fit(self, X, y=None):
63        """
64        Fit the Random Fourier feature transformer and the estimator.
65        """
66        X = check_array(X)
67
68        # Initialize and fit the Random Fourier Feature transformer
69        self.rff_ = RBFSampler(
70            n_components=self.n_components,
71            gamma=self.gamma,
72            random_state=self.random_state,
73        )
74        X_transformed = self.rff_.fit_transform(X)
75
76        # Fit the underlying estimator on the transformed data
77        self.estimator.fit(X_transformed, y)
78
79        return self

Fit the Random Fourier feature transformer and the estimator.

def predict(self, X):
110    def predict(self, X):
111        """
112        Predict using the Random Fourier transformed data.
113        """
114        check_is_fitted(self, ["rff_"])
115        X = check_array(X)
116
117        # Transform the input data
118        X_transformed = self.rff_.transform(X)
119
120        # Predict using the underlying estimator
121        return self.estimator.predict(X_transformed)

Predict using the Random Fourier transformed data.

def predict_proba(self, X):
123    def predict_proba(self, X):
124        """
125        Predict class probabilities (only for classifiers).
126        """
127        if (
128            not hasattr(self, "_estimator_type")
129            or self._estimator_type != "classifier"
130        ):
131            raise AttributeError(
132                "predict_proba is not available for this estimator type."
133            )
134
135        check_is_fitted(self, ["rff_"])
136        X = check_array(X)
137
138        if not hasattr(self.estimator, "predict_proba"):
139            raise ValueError(
140                f"The estimator {type(self.estimator).__name__} does not support predict_proba."
141            )
142
143        # Transform the input data
144        X_transformed = self.rff_.transform(X)
145
146        # Predict probabilities using the underlying estimator
147        return self.estimator.predict_proba(X_transformed)

Predict class probabilities (only for classifiers).

def score(self, X, y):
199    def score(self, X, y):
200        """
201        Evaluate the model performance.
202        """
203        check_is_fitted(self, ["rff_"])
204        X = check_array(X)
205
206        # Transform the input data
207        X_transformed = self.rff_.transform(X)
208
209        # Evaluate using the underlying estimator's score method
210        return self.estimator.score(X_transformed, y)

Evaluate the model performance.

class RandomFourierFeaturesRidge(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 17class RandomFourierFeaturesRidge(BaseEstimator, RegressorMixin):
 18    """
 19    Random Fourier Features with Bayesian Ridge Regression.
 20
 21    Implements both standard (MLE) and Bayesian versions with uncertainty quantification.
 22    Uses data augmentation for L2 regularization via jnp.lstsq.
 23    """
 24
 25    def __init__(
 26        self,
 27        n_features: int = 100,
 28        gamma: float = 1.0,
 29        alpha: float = 1e-6,
 30        include_bias: bool = True,
 31        random_seed: int = 42,
 32    ):
 33        """
 34        Parameters:
 35        -----------
 36        n_features : int
 37            Number of random Fourier features (D)
 38        gamma : float
 39            RBF kernel parameter: k(x,y) = exp(-gamma * ||x-y||²)
 40        alpha : float
 41            Prior precision (inverse variance) for Bayesian version
 42            Equivalent to regularization strength: lambda = alpha / beta
 43        include_bias : bool
 44            Whether to include a bias term
 45        random_seed : int
 46            Random seed for reproducibility
 47        """
 48
 49        if not JAX_AVAILABLE:
 50            raise RuntimeError(
 51                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 52            )
 53
 54        self.n_features = n_features
 55        self.gamma = gamma
 56        self.alpha = alpha
 57        self.include_bias = include_bias
 58        self.key = random.PRNGKey(random_seed)
 59        self.is_fitted = False
 60
 61        # Bayesian parameters
 62        self.beta = None  # Noise precision (will be estimated from data)
 63        self.w_mean = None  # Posterior mean of weights
 64        self.w_cov = None  # Posterior covariance of weights
 65        self.S_N = None  # Posterior precision matrix
 66
 67    def _compute_random_features(self, X, W, b):
 68        """Compute random Fourier features: sqrt(2/D) * cos(XW + b)"""
 69        projection = jnp.dot(X, W) + b  # Shape: (n_samples, n_features)
 70        features = jnp.sqrt(2.0 / self.n_features) * jnp.cos(projection)
 71
 72        if self.include_bias:
 73            features = jnp.concatenate(
 74                [jnp.ones((X.shape[0], 1)), features], axis=1
 75            )
 76
 77        return features
 78
 79    def _init_random_weights(self, input_dim):
 80        """Initialize random weights and biases for RFF"""
 81        # Sample from Gaussian distribution for RBF kernel
 82        # Variance = 2 * gamma for RBF kernel
 83        self.key, subkey = random.split(self.key)
 84        W = random.normal(
 85            subkey, shape=(input_dim, self.n_features)
 86        ) * jnp.sqrt(2.0 * self.gamma)
 87
 88        self.key, subkey = random.split(self.key)
 89        b = random.uniform(
 90            subkey, shape=(1, self.n_features), minval=0, maxval=2 * jnp.pi
 91        )
 92
 93        return W, b
 94
 95    def fit(
 96        self,
 97        X,
 98        y,
 99        method="bayesian",
100        noise_variance=None,
101    ):
102        """
103        Fit the model using either standard or Bayesian ridge regression.
104
105        Parameters:
106        -----------
107        X : array-like, shape (n_samples, n_features)
108            Training data
109        y : array-like, shape (n_samples,) or (n_samples, n_targets)
110            Target values
111        method : str, either "standard" or "bayesian"
112            "standard": Maximum likelihood estimation with L2 regularization
113            "bayesian": Full Bayesian inference with uncertainty quantification
114        noise_variance : float, optional
115            If provided, fixes the noise variance instead of estimating it
116        """
117        # Convert to JAX arrays if needed
118        X = jnp.asarray(X)
119        y = jnp.asarray(y)
120
121        if len(y.shape) == 1:
122            y = y.reshape(-1, 1)
123
124        n_samples, input_dim = X.shape
125
126        # Initialize random Fourier weights
127        self.W, self.b = self._init_random_weights(input_dim)
128
129        # Compute random Fourier features
130        Phi = self._compute_random_features(X, self.W, self.b)
131        n_basis = Phi.shape[1]  # D + 1 if bias included
132
133        # Store feature matrix and target values for Bayesian updates/likelihood computation
134        self.Phi_train = Phi
135        self.y_train = y  # Store y_train
136
137        if method == "standard":
138            # Standard ridge regression using data augmentation for regularization
139            self._fit_standard(Phi, y)
140        elif method == "bayesian":
141            # Bayesian ridge regression
142            self._fit_bayesian(Phi, y, noise_variance)
143        else:
144            raise ValueError("method must be 'standard' or 'bayesian'")
145
146        self.is_fitted = True
147        self.method = method
148        self.input_dim = input_dim
149
150        return self
151
152    def _fit_standard(self, Phi, y) -> None:
153        """Standard ridge regression using lstsq with data augmentation"""
154        n_samples, n_basis = Phi.shape
155
156        # Create augmented data for L2 regularization
157        # This is equivalent to adding sqrt(alpha) * I to the design matrix
158        sqrt_alpha = jnp.sqrt(self.alpha)
159        Phi_aug = jnp.vstack([Phi, sqrt_alpha * jnp.eye(n_basis)])
160        y_aug = jnp.vstack([y, jnp.zeros((n_basis, y.shape[1]))])
161
162        # Solve using least squares
163        # Note: jnp.linalg.lstsq is more stable than explicit normal equations
164        weights, residuals, rank, s = jnp.linalg.lstsq(
165            Phi_aug, y_aug, rcond=None
166        )
167
168        self.w_mean = weights
169        self.weights = weights  # For compatibility
170
171        # Estimate noise variance from residuals
172        residuals = y - Phi @ weights
173        self.beta = 1.0 / jnp.maximum(jnp.var(residuals), 1e-8)
174
175    def _fit_bayesian(
176        self,
177        Phi,
178        y,
179        noise_variance=None,
180    ) -> None:
181        """Bayesian ridge regression with evidence approximation"""
182        n_samples, n_basis = Phi.shape
183
184        # Initialize precision parameters
185        if noise_variance is not None:
186            self.beta = 1.0 / noise_variance
187        else:
188            # Initial estimate of beta from data
189            self.beta = 1.0 / jnp.maximum(jnp.var(y), 1e-8)
190
191        # Posterior precision matrix: S_N⁻¹ = alpha * I + beta * ΦᵀΦ
192        I = jnp.eye(n_basis)
193        PhiT_Phi = Phi.T @ Phi
194
195        # Initialize with prior
196        S_N_inv = self.alpha * I
197
198        # Evidence approximation to optimize alpha, beta
199        for _ in range(10):  # Iterate to converge on alpha, beta
200            # Update posterior mean and covariance
201            S_N = jnp.linalg.inv(S_N_inv + self.beta * PhiT_Phi)
202            self.w_mean = self.beta * S_N @ Phi.T @ y
203
204            # Update gamma (effective number of parameters)
205            eigenvalues = jnp.linalg.eigvalsh(PhiT_Phi)
206            gamma_val = jnp.sum(eigenvalues / (self.alpha + eigenvalues))
207
208            # Update alpha and beta (MacKay's fixed point updates)
209            if self.alpha > 0:
210                self.alpha = gamma_val / jnp.sum(self.w_mean**2)
211
212            if noise_variance is None:
213                residuals = y - Phi @ self.w_mean
214                self.beta = (n_samples - gamma_val) / jnp.sum(residuals**2)
215
216            # Update precision matrix
217            S_N_inv = self.alpha * I
218
219        # Store final covariance
220        self.S_N = jnp.linalg.inv(self.alpha * I + self.beta * PhiT_Phi)
221        self.w_cov = self.S_N
222
223        # Also store for compatibility
224        self.weights = self.w_mean
225
226    def transform(self, X):
227        """Transform input data to random Fourier feature space"""
228        if not self.is_fitted:
229            raise ValueError("Model must be fitted before transforming")
230
231        X = jnp.asarray(X)
232        return self._compute_random_features(X, self.W, self.b)
233
234    def predict(
235        self,
236        X,
237        return_std=False,
238        return_cov=False,
239    ):
240        """
241        Make predictions, optionally with uncertainty quantification.
242
243        Parameters:
244        -----------
245        X : array-like, shape (n_samples, n_features)
246            Input data
247        return_std : bool
248            If True, return standard deviation of predictive distribution
249        return_cov : bool
250            If True, return full covariance matrix of predictive distribution
251
252        Returns:
253        --------
254        y_pred : jnp.ndarray
255            Predictive mean
256        y_std or y_cov : jnp.ndarray, optional
257            Predictive standard deviation or covariance
258        """
259        if not self.is_fitted:
260            raise ValueError("Model must be fitted before prediction")
261
262        X = jnp.asarray(X)
263        Phi = self.transform(X)
264
265        # Predictive mean
266        y_pred = Phi @ self.w_mean
267
268        if not return_std and not return_cov:
269            return y_pred
270
271        if self.method != "bayesian":
272            raise ValueError(
273                "Uncertainty quantification only available for Bayesian method"
274            )
275
276        # Predictive variance
277        if return_cov:
278            # Full predictive covariance
279            # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ
280            pred_cov = (1.0 / self.beta) * jnp.eye(
281                Phi.shape[0]
282            ) + Phi @ self.S_N @ Phi.T
283            return y_pred, pred_cov
284        else:
285            # Diagonal of predictive covariance (standard deviations)
286            # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ)
287            var_diag = (1.0 / self.beta) + jnp.sum(
288                (Phi @ self.S_N) * Phi, axis=1
289            )
290            y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1)
291            return y_pred, y_std
292
293    def sample_posterior(
294        self,
295        X,
296        n_samples=1,
297        key=None,
298    ):
299        """
300        Sample from the posterior predictive distribution.
301
302        Parameters:
303        -----------
304        X : array-like
305            Input data
306        n_samples : int
307            Number of samples to draw
308        key : PRNGKey, optional
309            Random key for sampling
310
311        Returns:
312        --------
313        samples : jnp.ndarray, shape (n_samples, n_test_samples)
314            Samples from posterior predictive distribution
315        """
316        if self.method != "bayesian":
317            raise ValueError("Sampling only available for Bayesian method")
318
319        if key is None:
320            key = self.key
321
322        X = jnp.asarray(X)
323        Phi = self.transform(X)
324        n_test = Phi.shape[0]
325
326        # Sample weights from posterior
327        key, subkey = random.split(key)
328        w_samples = random.multivariate_normal(
329            subkey, self.w_mean.flatten(), self.S_N, shape=(n_samples,)
330        )
331
332        # Generate predictions for each weight sample
333        samples = []
334        for i in range(n_samples):
335            w_sample = w_samples[i].reshape(-1, 1)
336            # Add noise variance
337            key, subkey1, subkey2 = random.split(key, 3)
338            pred_mean = Phi @ w_sample
339            noise = random.normal(subkey2, shape=pred_mean.shape) / jnp.sqrt(
340                self.beta
341            )
342            samples.append(pred_mean + noise)
343
344        return jnp.stack(samples, axis=0)
345
346    def log_marginal_likelihood(self) -> float:
347        """
348        Compute log marginal likelihood (evidence) for Bayesian model.
349
350        Returns:
351        --------
352        log_evidence : float
353            Log marginal likelihood p(y|X,α,β)
354        """
355        if self.method != "bayesian":
356            raise ValueError(
357                "Log marginal likelihood only available for Bayesian method"
358            )
359
360        n_samples = self.Phi_train.shape[0]
361        n_basis = self.Phi_train.shape[1]
362
363        # Log determinant term
364        I = jnp.eye(n_basis)
365        A = self.alpha * I + self.beta * self.Phi_train.T @ self.Phi_train
366        sign, logdet_A = jnp.linalg.slogdet(A)
367        logdet_term = 0.5 * (n_basis * jnp.log(self.alpha) - logdet_A)
368
369        # Data fit term
370        residuals = self.y_train - self.Phi_train @ self.w_mean
371        data_fit_term = -0.5 * self.beta * jnp.sum(residuals**2)
372
373        # Constant term
374        const_term = 0.5 * n_samples * jnp.log(self.beta / (2 * jnp.pi))
375
376        return float(logdet_term + data_fit_term + const_term)
377
378    def get_params(self) -> Dict:
379        """Get model parameters"""
380        return {
381            "n_features": self.n_features,
382            "gamma": self.gamma,
383            "alpha": self.alpha,
384            "beta": self.beta if self.beta is not None else None,
385            "method": self.method if hasattr(self, "method") else None,
386            "input_dim": self.input_dim if hasattr(self, "input_dim") else None,
387        }
388
389    def set_params(self, **params) -> "RandomFourierFeaturesRidge":
390        """Set model parameters"""
391        for key, value in params.items():
392            if hasattr(self, key):
393                setattr(self, key, value)
394        return self

Random Fourier Features with Bayesian Ridge Regression.

Implements both standard (MLE) and Bayesian versions with uncertainty quantification. Uses data augmentation for L2 regularization via jnp.lstsq.

def fit(self, X, y, method='bayesian', noise_variance=None):
 95    def fit(
 96        self,
 97        X,
 98        y,
 99        method="bayesian",
100        noise_variance=None,
101    ):
102        """
103        Fit the model using either standard or Bayesian ridge regression.
104
105        Parameters:
106        -----------
107        X : array-like, shape (n_samples, n_features)
108            Training data
109        y : array-like, shape (n_samples,) or (n_samples, n_targets)
110            Target values
111        method : str, either "standard" or "bayesian"
112            "standard": Maximum likelihood estimation with L2 regularization
113            "bayesian": Full Bayesian inference with uncertainty quantification
114        noise_variance : float, optional
115            If provided, fixes the noise variance instead of estimating it
116        """
117        # Convert to JAX arrays if needed
118        X = jnp.asarray(X)
119        y = jnp.asarray(y)
120
121        if len(y.shape) == 1:
122            y = y.reshape(-1, 1)
123
124        n_samples, input_dim = X.shape
125
126        # Initialize random Fourier weights
127        self.W, self.b = self._init_random_weights(input_dim)
128
129        # Compute random Fourier features
130        Phi = self._compute_random_features(X, self.W, self.b)
131        n_basis = Phi.shape[1]  # D + 1 if bias included
132
133        # Store feature matrix and target values for Bayesian updates/likelihood computation
134        self.Phi_train = Phi
135        self.y_train = y  # Store y_train
136
137        if method == "standard":
138            # Standard ridge regression using data augmentation for regularization
139            self._fit_standard(Phi, y)
140        elif method == "bayesian":
141            # Bayesian ridge regression
142            self._fit_bayesian(Phi, y, noise_variance)
143        else:
144            raise ValueError("method must be 'standard' or 'bayesian'")
145
146        self.is_fitted = True
147        self.method = method
148        self.input_dim = input_dim
149
150        return self

Fit the model using either standard or Bayesian ridge regression.

Parameters:

X : array-like, shape (n_samples, n_features) Training data y : array-like, shape (n_samples,) or (n_samples, n_targets) Target values method : str, either "standard" or "bayesian" "standard": Maximum likelihood estimation with L2 regularization "bayesian": Full Bayesian inference with uncertainty quantification noise_variance : float, optional If provided, fixes the noise variance instead of estimating it

def predict(self, X, return_std=False, return_cov=False):
234    def predict(
235        self,
236        X,
237        return_std=False,
238        return_cov=False,
239    ):
240        """
241        Make predictions, optionally with uncertainty quantification.
242
243        Parameters:
244        -----------
245        X : array-like, shape (n_samples, n_features)
246            Input data
247        return_std : bool
248            If True, return standard deviation of predictive distribution
249        return_cov : bool
250            If True, return full covariance matrix of predictive distribution
251
252        Returns:
253        --------
254        y_pred : jnp.ndarray
255            Predictive mean
256        y_std or y_cov : jnp.ndarray, optional
257            Predictive standard deviation or covariance
258        """
259        if not self.is_fitted:
260            raise ValueError("Model must be fitted before prediction")
261
262        X = jnp.asarray(X)
263        Phi = self.transform(X)
264
265        # Predictive mean
266        y_pred = Phi @ self.w_mean
267
268        if not return_std and not return_cov:
269            return y_pred
270
271        if self.method != "bayesian":
272            raise ValueError(
273                "Uncertainty quantification only available for Bayesian method"
274            )
275
276        # Predictive variance
277        if return_cov:
278            # Full predictive covariance
279            # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ
280            pred_cov = (1.0 / self.beta) * jnp.eye(
281                Phi.shape[0]
282            ) + Phi @ self.S_N @ Phi.T
283            return y_pred, pred_cov
284        else:
285            # Diagonal of predictive covariance (standard deviations)
286            # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ)
287            var_diag = (1.0 / self.beta) + jnp.sum(
288                (Phi @ self.S_N) * Phi, axis=1
289            )
290            y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1)
291            return y_pred, y_std

Make predictions, optionally with uncertainty quantification.

Parameters:

X : array-like, shape (n_samples, n_features) Input data return_std : bool If True, return standard deviation of predictive distribution return_cov : bool If True, return full covariance matrix of predictive distribution

Returns:

y_pred : jnp.ndarray Predictive mean y_std or y_cov : jnp.ndarray, optional Predictive standard deviation or covariance

class RandomFourierFeaturesRidgeGCV(nnetsauce.RandomFourierFeaturesRidge):
397class RandomFourierFeaturesRidgeGCV(RandomFourierFeaturesRidge):
398    """
399    Extends RandomFourierFeaturesRidge with GCV for automatic
400    regularization parameter selection.
401    """
402
403    def __init__(
404        self,
405        n_features: int = 100,
406        gamma: float = 1.0,
407        alpha: Optional[float] = None,
408        include_bias: bool = True,
409        random_seed: int = 42,
410    ):
411        super().__init__(n_features, gamma, alpha, include_bias, random_seed)
412        self.alpha_opt = None  # Stores the GCV-optimized alpha
413        self.gcv_score = None  # Stores the optimal GCV score
414
415    def _compute_gcv(
416        self,
417        alpha,
418        s_sq,
419        U,
420        y,
421        n_samples,
422    ):
423        """
424        Compute GCV score for a given alpha.
425
426        Parameters:
427        -----------
428        alpha : float
429            Regularization parameter
430        s_sq : jnp.ndarray
431            Squared singular values of design matrix Φ
432        U : jnp.ndarray
433            Left singular vectors of Φ
434        y : jnp.ndarray
435            Target values
436        n_samples : int
437            Number of data points
438
439        Returns:
440        --------
441        gcv : float
442            GCV score for this alpha
443        """
444        # Degrees of freedom: df(α) = Σ(σ_j²/(σ_j² + α))
445        df = jnp.sum(s_sq / (s_sq + alpha))
446
447        # Compute residual sum of squares efficiently using SVD
448        # y_pred = U @ (S²/(S² + α)) @ (U.T @ y)
449        Uty = U.T @ y
450        shrinkage = s_sq / (s_sq + alpha)
451        y_pred = U @ (shrinkage * Uty)
452        residuals = y - y_pred
453        rss = jnp.sum(residuals**2)
454
455        # GCV formula
456        denom = (1.0 - df / n_samples) ** 2
457        gcv = (rss / n_samples) / denom
458
459        return float(gcv)
460
461    def fit_gcv(
462        self,
463        X,
464        y,
465        alpha_range: Tuple[float, float] = (1e-8, 1e4),
466        n_alphas: int = 50,
467        method: str = "standard",
468        optimize: bool = True,
469    ) -> "RandomFourierFeaturesRidgeGCV":
470        """
471        Fit model with GCV-optimized regularization parameter.
472
473        Parameters:
474        -----------
475        X : array-like
476            Training data
477        y : array-like
478            Target values
479        alpha_range : tuple
480            (min_alpha, max_alpha) range to search
481        n_alphas : int
482            Number of alpha values to try in initial grid search
483        method : str
484            "standard" or "bayesian"
485        optimize : bool
486            If True, perform fine optimization after grid search
487
488        Returns:
489        --------
490        self : fitted model
491        """
492        # Convert to JAX arrays
493        X = jnp.asarray(X)
494        y = jnp.asarray(y)
495
496        if len(y.shape) == 1:
497            y = y.reshape(-1, 1)
498
499        n_samples, input_dim = X.shape
500
501        # Initialize random Fourier weights
502        self.W, self.b = self._init_random_weights(input_dim)
503
504        # Compute random Fourier features
505        Phi = self._compute_random_features(X, self.W, self.b)
506
507        # Compute SVD of design matrix for efficient GCV computation
508        # Φ = U @ diag(S) @ V.T
509        U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False)
510        s_sq = S**2  # Squared singular values
511
512        # Grid search on log scale
513        alphas_grid = jnp.logspace(
514            jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas
515        )
516
517        gcv_scores = []
518        for alpha in alphas_grid:
519            score = self._compute_gcv(float(alpha), s_sq, U, y, n_samples)
520            gcv_scores.append(score)
521
522        # Find best alpha from grid
523        best_idx = jnp.argmin(jnp.array(gcv_scores))
524        alpha_grid_opt = float(alphas_grid[best_idx])
525
526        # Fine optimization using Brent's method
527        if optimize:
528            # Define objective for scipy optimizer
529            def gcv_objective(log_alpha):
530                alpha = 10**log_alpha
531                return self._compute_gcv(alpha, s_sq, U, y, n_samples)
532
533            # Optimize in log space
534            result = minimize_scalar(
535                gcv_objective,
536                bounds=(jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1])),
537                method="bounded",
538                options={"xatol": 0.1},  # Tolerance in log10 space
539            )
540
541            if result.success:
542                alpha_opt = 10**result.x
543                gcv_opt = result.fun
544            else:
545                alpha_opt = alpha_grid_opt
546                gcv_opt = gcv_scores[best_idx]
547        else:
548            alpha_opt = alpha_grid_opt
549            gcv_opt = gcv_scores[best_idx]
550
551        # Store optimized parameters
552        self.alpha_opt = alpha_opt
553        self.gcv_score = gcv_opt
554        self.alpha = alpha_opt  # Set as the model's alpha
555
556        # Fit final model with optimized alpha
557        if method == "standard":
558            self._fit_standard(Phi, y)
559        elif method == "bayesian":
560            # For Bayesian version, we can use alpha as prior precision
561            # Optionally optimize beta too
562            self._fit_bayesian(Phi, y)
563        else:
564            raise ValueError("method must be 'standard' or 'bayesian'")
565
566        self.is_fitted = True
567        self.method = method
568        self.input_dim = input_dim
569
570        return self
571
572    def fit_gcv_with_path(
573        self,
574        X,
575        y,
576        alpha_range: Tuple[float, float] = (1e-8, 1e4),
577        n_alphas: int = 100,
578        method: str = "standard",
579    ) -> dict:
580        """
581        Fit with GCV and return full regularization path.
582
583        Returns:
584        --------
585        path_info : dict
586            Dictionary with alpha values, GCV scores, and metrics
587        """
588        X = jnp.asarray(X)
589        y = jnp.asarray(y)
590
591        if len(y.shape) == 1:
592            y = y.reshape(-1, 1)
593
594        n_samples, input_dim = X.shape
595
596        # Initialize random features
597        self.W, self.b = self._init_random_weights(input_dim)
598        Phi = self._compute_random_features(X, self.W, self.b)
599
600        # Compute SVD
601        U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False)
602        s_sq = S**2
603
604        # Compute GCV path
605        alphas = jnp.logspace(
606            jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas
607        )
608
609        gcv_scores = []
610        train_errors = []
611        effective_dof = []
612
613        for alpha in alphas:
614            alpha_val = float(alpha)
615
616            # GCV score
617            gcv = self._compute_gcv(alpha_val, s_sq, U, y, n_samples)
618            gcv_scores.append(gcv)
619
620            # Effective degrees of freedom
621            df = float(jnp.sum(s_sq / (s_sq + alpha_val)))
622            effective_dof.append(df)
623
624            # Training error for this alpha
625            # Compute weights: w = V @ (S/(S² + α)) @ (U.T @ y)
626            Uty = U.T @ y
627            shrinkage = S / (s_sq + alpha_val)
628            w_alpha = Vt.T @ (shrinkage.reshape(-1, 1) * Uty)
629            y_pred = Phi @ w_alpha
630            train_err = float(jnp.mean((y - y_pred) ** 2))
631            train_errors.append(train_err)
632
633        # Find optimal alpha
634        best_idx = jnp.argmin(jnp.array(gcv_scores))
635        alpha_opt = float(alphas[best_idx])
636
637        # Fit final model with optimal alpha
638        self.alpha = alpha_opt
639        if method == "standard":
640            self._fit_standard(Phi, y)
641        elif method == "bayesian":
642            self._fit_bayesian(Phi, y)
643
644        self.is_fitted = True
645        self.method = method
646        self.input_dim = input_dim
647        self.alpha_opt = alpha_opt
648        self.gcv_score = gcv_scores[best_idx]
649
650        # Return full path information
651        path_info = {
652            "alphas": np.array(alphas),
653            "gcv_scores": np.array(gcv_scores),
654            "train_errors": np.array(train_errors),
655            "effective_dof": np.array(effective_dof),
656            "alpha_opt": alpha_opt,
657            "gcv_opt": gcv_scores[best_idx],
658            "dof_opt": effective_dof[best_idx],
659        }
660
661        return path_info
662
663    def plot_gcv_path(self, path_info: dict, save_path: str = None):
664        """
665        Plot GCV regularization path.
666        """
667        import matplotlib.pyplot as plt
668
669        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
670
671        # Plot 1: GCV score vs alpha
672        ax = axes[0, 0]
673        ax.semilogx(
674            path_info["alphas"], path_info["gcv_scores"], "b-", linewidth=2
675        )
676        ax.axvline(
677            path_info["alpha_opt"],
678            color="r",
679            linestyle="--",
680            label=f'Optimal α = {path_info["alpha_opt"]:.2e}',
681        )
682        ax.set_xlabel("Regularization α")
683        ax.set_ylabel("GCV Score")
684        ax.set_title("GCV Score vs Regularization")
685        ax.legend()
686        ax.grid(True, alpha=0.3)
687
688        # Plot 2: Training error vs alpha
689        ax = axes[0, 1]
690        ax.loglog(
691            path_info["alphas"], path_info["train_errors"], "g-", linewidth=2
692        )
693        ax.axvline(path_info["alpha_opt"], color="r", linestyle="--")
694        ax.set_xlabel("Regularization α")
695        ax.set_ylabel("Training MSE")
696        ax.set_title("Training Error vs Regularization")
697        ax.grid(True, alpha=0.3)
698
699        # Plot 3: Effective DOF vs alpha
700        ax = axes[1, 0]
701        ax.semilogx(
702            path_info["alphas"], path_info["effective_dof"], "m-", linewidth=2
703        )
704        ax.axvline(path_info["alpha_opt"], color="r", linestyle="--")
705        ax.axhline(
706            path_info["dof_opt"],
707            color="r",
708            linestyle=":",
709            label=f'DOF at optimum = {path_info["dof_opt"]:.1f}',
710        )
711        ax.set_xlabel("Regularization α")
712        ax.set_ylabel("Effective Degrees of Freedom")
713        ax.set_title("Model Complexity vs Regularization")
714        ax.legend()
715        ax.grid(True, alpha=0.3)
716
717        # Plot 4: GCV vs DOF
718        ax = axes[1, 1]
719        ax.plot(
720            path_info["effective_dof"],
721            path_info["gcv_scores"],
722            "k-",
723            linewidth=2,
724        )
725        ax.axvline(path_info["dof_opt"], color="r", linestyle="--")
726        ax.set_xlabel("Effective Degrees of Freedom")
727        ax.set_ylabel("GCV Score")
728        ax.set_title("GCV vs Model Complexity")
729        ax.grid(True, alpha=0.3)
730
731        plt.suptitle(
732            "GCV Regularization Path Analysis", fontsize=14, fontweight="bold"
733        )
734        plt.tight_layout()
735
736        if save_path:
737            plt.savefig(save_path, dpi=150, bbox_inches="tight")
738
739        plt.show()

Extends RandomFourierFeaturesRidge with GCV for automatic regularization parameter selection.

class RegressorUpdater(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 16class RegressorUpdater(BaseEstimator, RegressorMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    regr: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    def __init__(self, regr, alpha=0.5):
 39        self.regr = regr
 40        self.alpha = alpha
 41        self.n_obs_ = None
 42        self.coef_ = None
 43        self.updating_factor_ = None
 44        try:
 45            self.coef_ = self.regr.coef_
 46            if isinstance(self.regr, Base):
 47                self.n_obs_ = self.regr.scaler_.n_samples_seen_
 48        except AttributeError:
 49            pass
 50
 51    def fit(self, X, y, **kwargs):
 52        if isinstance(
 53            self.regr, CustomRegressor
 54        ):  # nnetsauce model not deep ---
 55            if check_is_fitted(self.regr) == False:
 56                self.regr.fit(X, y, **kwargs)
 57                self.n_obs_ = X.shape[0]
 58                if hasattr(self.regr, "coef_"):
 59                    self.coef_ = self.regr.coef_
 60                return self
 61            self.n_obs_ = self.regr.scaler_.n_samples_seen_
 62            if hasattr(self.regr, "coef_"):
 63                self.coef_ = self.regr.coef_
 64            return self
 65
 66        if (
 67            hasattr(self.regr, "coef_") == False
 68        ):  # sklearn model or CustomRegressor model ---
 69            self.regr.fit(X, y)
 70            self.n_obs_ = X.shape[0]
 71            self.regr.fit(X, y)
 72            if hasattr(self.regr, "stacked_obj"):
 73                self.coef_ = self.regr.stacked_obj.coef_
 74            else:
 75                self.coef_ = self.regr.coef_
 76            return self
 77        self.n_obs_ = X.shape[0]
 78        if hasattr(self.regr, "coef_"):
 79            self.coef_ = self.regr.coef_
 80        return self
 81
 82    def predict(self, X):
 83        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
 84        return self.regr.predict(X)
 85
 86    def partial_fit(self, X, y):
 87        assert hasattr(
 88            self.regr, "coef_"
 89        ), "model must be fitted first (i.e have 'coef_' attribute)"
 90        assert (
 91            self.n_obs_ is not None
 92        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
 93
 94        if len(X.shape) == 1:
 95            X = X.reshape(1, -1)
 96
 97        assert X.shape[0] == 1, "X must have one row"
 98
 99        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
100
101        if isinstance(self.regr, Base):  # nnetsauce model ---
102            newX = deepcopy(X)
103
104            if isinstance(
105                self.regr, CustomRegressor
106            ):  # other nnetsauce model (CustomRegressor) ---
107                newX = self.regr.cook_test_set(X=X)
108                if isinstance(X, pd.DataFrame):
109                    newx = newX.values.ravel()
110                else:
111                    newx = newX.ravel()
112
113        else:  # an sklearn model ---
114            if isinstance(X, pd.DataFrame):
115                newx = X.values.ravel()
116            else:
117                newx = X.ravel()
118
119        new_coef = self.regr.coef_ + self.updating_factor_ * np.dot(
120            newx, y - np.dot(newx, self.regr.coef_)
121        )
122        self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef)
123        self.coef_ = deepcopy(self.regr.coef_)
124        self.n_obs_ += 1
125        return self

Update a regression model with new observations

Parameters

regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
51    def fit(self, X, y, **kwargs):
52        if isinstance(
53            self.regr, CustomRegressor
54        ):  # nnetsauce model not deep ---
55            if check_is_fitted(self.regr) == False:
56                self.regr.fit(X, y, **kwargs)
57                self.n_obs_ = X.shape[0]
58                if hasattr(self.regr, "coef_"):
59                    self.coef_ = self.regr.coef_
60                return self
61            self.n_obs_ = self.regr.scaler_.n_samples_seen_
62            if hasattr(self.regr, "coef_"):
63                self.coef_ = self.regr.coef_
64            return self
65
66        if (
67            hasattr(self.regr, "coef_") == False
68        ):  # sklearn model or CustomRegressor model ---
69            self.regr.fit(X, y)
70            self.n_obs_ = X.shape[0]
71            self.regr.fit(X, y)
72            if hasattr(self.regr, "stacked_obj"):
73                self.coef_ = self.regr.stacked_obj.coef_
74            else:
75                self.coef_ = self.regr.coef_
76            return self
77        self.n_obs_ = X.shape[0]
78        if hasattr(self.regr, "coef_"):
79            self.coef_ = self.regr.coef_
80        return self
def predict(self, X):
82    def predict(self, X):
83        # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute"
84        return self.regr.predict(X)
class ClassifierUpdater(sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin):
 16class ClassifierUpdater(BaseEstimator, ClassifierMixin):
 17    """
 18    Update a regression model with new observations
 19
 20    Parameters
 21    ----------
 22    clf: object
 23        A regression model with a coef_ attribute
 24    alpha: float
 25        Updating factor's exponent
 26
 27    Attributes
 28    ----------
 29    n_obs_: int
 30        Number of observations
 31    coef_: np.ndarray
 32        Coefficients of the model
 33    updating_factor_: float
 34        Updating factor
 35
 36    """
 37
 38    _estimator_type = "classifier"
 39
 40    def __init__(self, clf, alpha=0.5):
 41        self.clf = clf
 42        self.alpha = alpha
 43        self.n_obs_ = None
 44        self.coef_ = None
 45        self.updating_factor_ = None
 46        try:
 47            self.coef_ = self.clf.coef_
 48            if isinstance(self.clf, Base):
 49                self.n_obs_ = self.clf.scaler_.n_samples_seen_
 50        except AttributeError:
 51            pass
 52
 53    def fit(self, X, y, **kwargs):
 54        raise NotImplementedError(
 55            "fit method is not implemented for ClassifierUpdater"
 56        )
 57
 58        if isinstance(
 59            self.clf, CustomClassifier
 60        ):  # nnetsauce model not deep ---
 61            if check_is_fitted(self.clf) == False:
 62                self.clf.fit(X, y, **kwargs)
 63                self.n_obs_ = X.shape[0]
 64                if hasattr(self.clf, "coef_"):
 65                    self.coef_ = self.clf.coef_
 66                return self
 67            self.n_obs_ = self.clf.scaler_.n_samples_seen_
 68            if hasattr(self.clf, "coef_"):
 69                self.coef_ = self.clf.coef_
 70            return self
 71
 72        if (
 73            hasattr(self.clf, "coef_") == False
 74        ):  # sklearn model or CustomClassifier model ---
 75            self.clf.fit(X, y)
 76            self.n_obs_ = X.shape[0]
 77            self.clf.fit(X, y)
 78            if hasattr(self.clf, "stacked_obj"):
 79                self.coef_ = self.clf.stacked_obj.coef_
 80            else:
 81                self.coef_ = self.clf.coef_
 82            return self
 83        self.n_obs_ = X.shape[0]
 84        if hasattr(self.clf, "coef_"):
 85            self.coef_ = self.clf.coef_
 86        return self
 87
 88    def predict(self, X):
 89        raise NotImplementedError(
 90            "predict method is not implemented for ClassifierUpdater"
 91        )
 92        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
 93        return self.clf.predict(X)
 94
 95    def partial_fit(self, X, y):
 96        raise NotImplementedError(
 97            "partial_fit method is not implemented for ClassifierUpdater"
 98        )
 99
100        assert hasattr(
101            self.clf, "coef_"
102        ), "model must be fitted first (i.e have 'coef_' attribute)"
103        assert (
104            self.n_obs_ is not None
105        ), "model must be fitted first (i.e have 'n_obs_' attribute)"
106
107        if len(X.shape) == 1:
108            X = X.reshape(1, -1)
109
110        assert X.shape[0] == 1, "X must have one row"
111
112        self.updating_factor_ = self.n_obs_ ** (-self.alpha)
113
114        if isinstance(self.clf, Base):  # nnetsauce model ---
115            newX = deepcopy(X)
116
117            if isinstance(
118                self.clf, CustomClassifier
119            ):  # other nnetsauce model (CustomClassifier) ---
120                newX = self.clf.cook_test_set(X=X)
121                if isinstance(X, pd.DataFrame):
122                    newx = newX.values.ravel()
123                else:
124                    newx = newX.ravel()
125
126        else:  # an sklearn model ---
127            if isinstance(X, pd.DataFrame):
128                newx = X.values.ravel()
129            else:
130                newx = X.ravel()
131
132        new_coef = self.clf.coef_ + self.updating_factor_ * np.dot(
133            newx, y - np.dot(newx, self.clf.coef_)
134        )
135        self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef)
136        self.coef_ = deepcopy(self.clf.coef_)
137        self.n_obs_ += 1
138        return self

Update a regression model with new observations

Parameters

clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent

Attributes

n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor

def fit(self, X, y, **kwargs):
53    def fit(self, X, y, **kwargs):
54        raise NotImplementedError(
55            "fit method is not implemented for ClassifierUpdater"
56        )
57
58        if isinstance(
59            self.clf, CustomClassifier
60        ):  # nnetsauce model not deep ---
61            if check_is_fitted(self.clf) == False:
62                self.clf.fit(X, y, **kwargs)
63                self.n_obs_ = X.shape[0]
64                if hasattr(self.clf, "coef_"):
65                    self.coef_ = self.clf.coef_
66                return self
67            self.n_obs_ = self.clf.scaler_.n_samples_seen_
68            if hasattr(self.clf, "coef_"):
69                self.coef_ = self.clf.coef_
70            return self
71
72        if (
73            hasattr(self.clf, "coef_") == False
74        ):  # sklearn model or CustomClassifier model ---
75            self.clf.fit(X, y)
76            self.n_obs_ = X.shape[0]
77            self.clf.fit(X, y)
78            if hasattr(self.clf, "stacked_obj"):
79                self.coef_ = self.clf.stacked_obj.coef_
80            else:
81                self.coef_ = self.clf.coef_
82            return self
83        self.n_obs_ = X.shape[0]
84        if hasattr(self.clf, "coef_"):
85            self.coef_ = self.clf.coef_
86        return self
def predict(self, X):
88    def predict(self, X):
89        raise NotImplementedError(
90            "predict method is not implemented for ClassifierUpdater"
91        )
92        # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute"
93        return self.clf.predict(X)
class RidgeRegressor(sklearn.base.BaseEstimator, sklearn.base.RegressorMixin):
 26class RidgeRegressor(BaseEstimator, RegressorMixin):
 27    """Ridge.
 28
 29    Attributes:
 30
 31        reg_lambda: float
 32            regularization parameter.
 33
 34        backend: str
 35            type of backend; must be in ('cpu', 'gpu', 'tpu')
 36
 37    """
 38
 39    def __init__(self, reg_lambda=0.1, backend="cpu"):
 40        assert backend in (
 41            "cpu",
 42            "gpu",
 43            "tpu",
 44        ), "`backend` must be in ('cpu', 'gpu', 'tpu')"
 45
 46        if not JAX_AVAILABLE and backend != "cpu":
 47            raise RuntimeError(
 48                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 49            )
 50
 51        sys_platform = platform.system()
 52
 53        if (sys_platform == "Windows") and (backend in ("gpu", "tpu")):
 54            warnings.warn(
 55                "No GPU/TPU computing on Windows yet, backend set to 'cpu'"
 56            )
 57            backend = "cpu"
 58
 59        self.reg_lambda = reg_lambda
 60        self.backend = backend
 61        self.coef_ = None
 62
 63    def fit(self, X, y, **kwargs):
 64        """Fit matrixops (classifier) to training data (X, y)
 65
 66        Args:
 67
 68            X: {array-like}, shape = [n_samples, n_features]
 69                Training vectors, where n_samples is the number
 70                of samples and n_features is the number of features.
 71
 72            y: array-like, shape = [n_samples]
 73                Target values.
 74
 75            **kwargs: additional parameters to be passed to self.cook_training_set.
 76
 77        Returns:
 78
 79            self: object.
 80
 81        """
 82        self.ym, centered_y = mo.center_response(y)
 83        self.xm = X.mean(axis=0)
 84        self.xsd = X.std(axis=0)
 85        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 86        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 87
 88        if self.backend == "cpu":
 89            if len(centered_y.shape) <= 1:
 90                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                X_ = np.row_stack((X_, eye_term))
 92                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 93                beta_info = get_beta(X_, y_)
 94                self.coef_ = beta_info[0]
 95            else:
 96                try:
 97                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 98                    X_ = np.row_stack((X_, eye_term))
 99                    y_ = np.row_stack(
100                        (
101                            centered_y,
102                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
103                        )
104                    )
105                    beta_info = get_beta(X_, y_)
106                    self.coef_ = beta_info[0]
107                except Exception:
108                    x = inv(
109                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
110                    )
111                    hat_matrix = mo.tcrossprod(x, X_)
112                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
113            return self
114
115        x = jinv(
116            mo.crossprod(X_, backend=self.backend)
117            + self.reg_lambda * jnp.eye(X_.shape[1])
118        )
119
120        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
121        self.coef_ = mo.safe_sparse_dot(
122            hat_matrix, centered_y, backend=self.backend
123        )
124        return self
125
126    def predict(self, X, **kwargs):
127        """Predict test data X.
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            **kwargs: additional parameters to be passed to `predict_proba`
136
137        Returns:
138
139            model predictions: {array-like}
140
141        """
142        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
143
144        if self.backend == "cpu":
145            if isinstance(self.ym, float):
146                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
147            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
148
149        # if self.backend in ("gpu", "tpu"):
150        if isinstance(self.ym, float):
151            return self.ym + mo.safe_sparse_dot(
152                X_, self.coef_, backend=self.backend
153            )
154        return self.ym[None, :] + mo.safe_sparse_dot(
155            X_, self.coef_, backend=self.backend
156        )

Ridge.

Attributes:

reg_lambda: float
    regularization parameter.

backend: str
    type of backend; must be in ('cpu', 'gpu', 'tpu')
def fit(self, X, y, **kwargs):
 63    def fit(self, X, y, **kwargs):
 64        """Fit matrixops (classifier) to training data (X, y)
 65
 66        Args:
 67
 68            X: {array-like}, shape = [n_samples, n_features]
 69                Training vectors, where n_samples is the number
 70                of samples and n_features is the number of features.
 71
 72            y: array-like, shape = [n_samples]
 73                Target values.
 74
 75            **kwargs: additional parameters to be passed to self.cook_training_set.
 76
 77        Returns:
 78
 79            self: object.
 80
 81        """
 82        self.ym, centered_y = mo.center_response(y)
 83        self.xm = X.mean(axis=0)
 84        self.xsd = X.std(axis=0)
 85        self.xsd[self.xsd == 0] = 1  # avoid division by zero
 86        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
 87
 88        if self.backend == "cpu":
 89            if len(centered_y.shape) <= 1:
 90                eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 91                X_ = np.row_stack((X_, eye_term))
 92                y_ = np.concatenate((centered_y, np.zeros(X.shape[1])))
 93                beta_info = get_beta(X_, y_)
 94                self.coef_ = beta_info[0]
 95            else:
 96                try:
 97                    eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1])
 98                    X_ = np.row_stack((X_, eye_term))
 99                    y_ = np.row_stack(
100                        (
101                            centered_y,
102                            np.zeros((eye_term.shape[0], centered_y.shape[1])),
103                        )
104                    )
105                    beta_info = get_beta(X_, y_)
106                    self.coef_ = beta_info[0]
107                except Exception:
108                    x = inv(
109                        mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1])
110                    )
111                    hat_matrix = mo.tcrossprod(x, X_)
112                    self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y)
113            return self
114
115        x = jinv(
116            mo.crossprod(X_, backend=self.backend)
117            + self.reg_lambda * jnp.eye(X_.shape[1])
118        )
119
120        hat_matrix = mo.tcrossprod(x, X_, backend=self.backend)
121        self.coef_ = mo.safe_sparse_dot(
122            hat_matrix, centered_y, backend=self.backend
123        )
124        return self

Fit matrixops (classifier) to training data (X, y)

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to self.cook_training_set.

Returns:

self: object.
def predict(self, X, **kwargs):
126    def predict(self, X, **kwargs):
127        """Predict test data X.
128
129        Args:
130
131            X: {array-like}, shape = [n_samples, n_features]
132                Training vectors, where n_samples is the number
133                of samples and n_features is the number of features.
134
135            **kwargs: additional parameters to be passed to `predict_proba`
136
137        Returns:
138
139            model predictions: {array-like}
140
141        """
142        X_ = (X - self.xm[None, :]) / self.xsd[None, :]
143
144        if self.backend == "cpu":
145            if isinstance(self.ym, float):
146                return self.ym + mo.safe_sparse_dot(X_, self.coef_)
147            return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_)
148
149        # if self.backend in ("gpu", "tpu"):
150        if isinstance(self.ym, float):
151            return self.ym + mo.safe_sparse_dot(
152                X_, self.coef_, backend=self.backend
153            )
154        return self.ym[None, :] + mo.safe_sparse_dot(
155            X_, self.coef_, backend=self.backend
156        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to `predict_proba`

Returns:

model predictions: {array-like}
class Ridge2Regressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 23class Ridge2Regressor(Ridge2, RegressorMixin):
 24    """Ridge regression with 2 regularization parameters derived from class Ridge
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            'cpu' or 'gpu' or 'tpu'
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        coef_: {array-like}
 84            alias for `beta_`, regression coefficients
 85
 86        y_mean_: float
 87            average response
 88
 89    """
 90
 91    # construct the object -----
 92
 93    def __init__(
 94        self,
 95        n_hidden_features=5,
 96        activation_name="relu",
 97        a=0.01,
 98        nodes_sim="sobol",
 99        bias=True,
100        dropout=0,
101        n_clusters=2,
102        cluster_encode=True,
103        type_clust="kmeans",
104        type_scaling=("std", "std", "std"),
105        lambda1=0.1,
106        lambda2=0.1,
107        seed=123,
108        backend="cpu",
109    ):
110        super().__init__(
111            n_hidden_features=n_hidden_features,
112            activation_name=activation_name,
113            a=a,
114            nodes_sim=nodes_sim,
115            bias=bias,
116            dropout=dropout,
117            n_clusters=n_clusters,
118            cluster_encode=cluster_encode,
119            type_clust=type_clust,
120            type_scaling=type_scaling,
121            lambda1=lambda1,
122            lambda2=lambda2,
123            seed=seed,
124            backend=backend,
125        )
126
127        self.type_fit = "regression"
128        self.coef_ = None
129
130    def fit(self, X, y, **kwargs):
131        """Fit Ridge model to training data (X, y).
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            y: array-like, shape = [n_samples]
140                Target values.
141
142            **kwargs: additional parameters to be passed to
143                    self.cook_training_set or self.obj.fit
144
145        Returns:
146
147            self: object
148
149        """
150
151        sys_platform = platform.system()
152
153        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
154
155        n_X, p_X = X.shape
156        n_Z, p_Z = scaled_Z.shape
157
158        if self.n_clusters > 0:
159            if self.encode_clusters == True:
160                n_features = p_X + self.n_clusters
161            else:
162                n_features = p_X + 1
163        else:
164            n_features = p_X
165
166        X_ = scaled_Z[:, 0:n_features]
167        Phi_X_ = scaled_Z[:, n_features:p_Z]
168
169        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
170            np.repeat(1, n_features)
171        )
172        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
173        D = mo.crossprod(
174            x=Phi_X_, backend=self.backend
175        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
176
177        if sys_platform in ("Linux", "Darwin"):
178            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
179        else:
180            B_inv = pinv(B)
181
182        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
183        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
184
185        if sys_platform in ("Linux", "Darwin"):
186            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
187        else:
188            S_inv = pinv(S_mat)
189
190        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
191        inv = mo.rbind(
192            mo.cbind(
193                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
194                y=-np.transpose(Y),
195                backend=self.backend,
196            ),
197            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
198            backend=self.backend,
199        )
200
201        self.beta_ = mo.safe_sparse_dot(
202            a=inv,
203            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
204            backend=self.backend,
205        )
206
207        self.coef_ = self.beta_  # sklearn compatibility
208
209        return self
210
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228
229        if len(X.shape) == 1:
230            n_features = X.shape[0]
231            new_X = mo.rbind(
232                x=X.reshape(1, n_features),
233                y=np.ones(n_features).reshape(1, n_features),
234                backend=self.backend,
235            )
236
237            return (
238                self.y_mean_
239                + mo.safe_sparse_dot(
240                    a=self.cook_test_set(new_X, **kwargs),
241                    b=self.beta_,
242                    backend=self.backend,
243                )
244            )[0]
245
246        return self.y_mean_ + mo.safe_sparse_dot(
247            a=self.cook_test_set(X, **kwargs),
248            b=self.beta_,
249            backend=self.backend,
250        )
251
252    def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs):
253        """Incrementally fit the Ridge model using SGD-style updates.
254
255        Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n
256        for online learning with individual samples.
257
258        Args:
259            X: {array-like}, shape = [n_samples, n_features]
260                Training vectors for this batch
261
262            y: array-like, shape = [n_samples]
263                Target values for this batch
264
265            learning_rate: float, default=0.01
266                Initial learning rate for SGD updates
267
268            decay: float, default=0.001
269                Learning rate decay parameter
270
271            **kwargs: additional parameters to be passed to self.cook_training_set
272
273        Returns:
274            self: object
275        """
276
277        # Input validation
278        X = np.asarray(X)
279        y = np.asarray(y)
280
281        if X.shape[0] != y.shape[0]:
282            raise ValueError("X and y must have the same number of samples")
283
284        # Handle first call
285        if not self._is_fitted:
286            # Initialize learning parameters
287            self.initial_learning_rate = learning_rate
288            self.decay = decay
289            self._step_count = 0
290            self._is_fitted = True
291
292        # Process the batch
293        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
294
295        # Get dimensions
296        n_samples, n_features_total = scaled_Z.shape
297        n_original_features = X.shape[1]
298
299        # Determine feature dimensions for regularization
300        if self.n_clusters > 0:
301            if self.cluster_encode:
302                n_direct_features = n_original_features + self.n_clusters
303            else:
304                n_direct_features = n_original_features + 1
305        else:
306            n_direct_features = n_original_features
307
308        # Initialize beta_ if first time
309        if not hasattr(self, "beta_") or self.beta_ is None:
310            # For regression, beta_ is 1D (single output)
311            self.beta_ = np.zeros(n_features_total)
312
313        # Precompute indices for regularization
314        direct_indices = slice(0, n_direct_features)
315        hidden_indices = slice(n_direct_features, n_features_total)
316
317        # Process each sample with SGD
318        for i in range(n_samples):
319            self._step_count += 1
320
321            # Current learning rate with decay
322            current_lr = self.initial_learning_rate / (
323                1 + self.decay * self._step_count
324            )
325
326            # Current sample and target
327            x_i = scaled_Z[i, :]  # Feature vector
328            y_i = (
329                centered_y[i] if centered_y.ndim == 1 else centered_y[i, 0]
330            )  # Scalar target
331
332            # Prediction: x_i^T * beta
333            prediction = x_i @ self.beta_
334
335            # Error: y_i - prediction
336            error = y_i - prediction
337
338            # Gradient update: current_lr * x_i * error
339            gradient_update = current_lr * x_i * error
340
341            # Regularization terms (more efficient indexing)
342            reg_update = np.zeros_like(self.beta_)
343            reg_update[direct_indices] = (
344                current_lr * self.lambda1 * self.beta_[direct_indices]
345            )
346            reg_update[hidden_indices] = (
347                current_lr * self.lambda2 * self.beta_[hidden_indices]
348            )
349
350            # Combined update: beta = beta + gradient_update - reg_update
351            self.beta_ += gradient_update - reg_update
352
353        self.coef_ = self.beta_  # sklearn compatibility
354
355        return self

Ridge regression with 2 regularization parameters derived from class Ridge

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}
    regression coefficients

coef_: {array-like}
    alias for `beta_`, regression coefficients

y_mean_: float
    average response
def fit(self, X, y, **kwargs):
130    def fit(self, X, y, **kwargs):
131        """Fit Ridge model to training data (X, y).
132
133        Args:
134
135            X: {array-like}, shape = [n_samples, n_features]
136                Training vectors, where n_samples is the number
137                of samples and n_features is the number of features.
138
139            y: array-like, shape = [n_samples]
140                Target values.
141
142            **kwargs: additional parameters to be passed to
143                    self.cook_training_set or self.obj.fit
144
145        Returns:
146
147            self: object
148
149        """
150
151        sys_platform = platform.system()
152
153        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
154
155        n_X, p_X = X.shape
156        n_Z, p_Z = scaled_Z.shape
157
158        if self.n_clusters > 0:
159            if self.encode_clusters == True:
160                n_features = p_X + self.n_clusters
161            else:
162                n_features = p_X + 1
163        else:
164            n_features = p_X
165
166        X_ = scaled_Z[:, 0:n_features]
167        Phi_X_ = scaled_Z[:, n_features:p_Z]
168
169        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
170            np.repeat(1, n_features)
171        )
172        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
173        D = mo.crossprod(
174            x=Phi_X_, backend=self.backend
175        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
176
177        if sys_platform in ("Linux", "Darwin"):
178            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
179        else:
180            B_inv = pinv(B)
181
182        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
183        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
184
185        if sys_platform in ("Linux", "Darwin"):
186            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
187        else:
188            S_inv = pinv(S_mat)
189
190        Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
191        inv = mo.rbind(
192            mo.cbind(
193                x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
194                y=-np.transpose(Y),
195                backend=self.backend,
196            ),
197            mo.cbind(x=-Y, y=S_inv, backend=self.backend),
198            backend=self.backend,
199        )
200
201        self.beta_ = mo.safe_sparse_dot(
202            a=inv,
203            b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend),
204            backend=self.backend,
205        )
206
207        self.coef_ = self.beta_  # sklearn compatibility
208
209        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
211    def predict(self, X, **kwargs):
212        """Predict test data X.
213
214        Args:
215
216            X: {array-like}, shape = [n_samples, n_features]
217                Training vectors, where n_samples is the number
218                of samples and n_features is the number of features.
219
220            **kwargs: additional parameters to be passed to
221                    self.cook_test_set
222
223        Returns:
224
225            model predictions: {array-like}
226
227        """
228
229        if len(X.shape) == 1:
230            n_features = X.shape[0]
231            new_X = mo.rbind(
232                x=X.reshape(1, n_features),
233                y=np.ones(n_features).reshape(1, n_features),
234                backend=self.backend,
235            )
236
237            return (
238                self.y_mean_
239                + mo.safe_sparse_dot(
240                    a=self.cook_test_set(new_X, **kwargs),
241                    b=self.beta_,
242                    backend=self.backend,
243                )
244            )[0]
245
246        return self.y_mean_ + mo.safe_sparse_dot(
247            a=self.cook_test_set(X, **kwargs),
248            b=self.beta_,
249            backend=self.backend,
250        )

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
class Ridge2MultiOutputRegressor(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.RegressorMixin):
 28class Ridge2MultiOutputRegressor(Ridge2, RegressorMixin):
 29    """Ridge regression with 2 regularization parameters for multiple outputs (zero-loop, JAX-optimized)
 30
 31    Parameters:
 32
 33        n_hidden_features: int
 34            number of nodes in the hidden layer
 35
 36        activation_name: str
 37            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 38
 39        a: float
 40            hyperparameter for 'prelu' or 'elu' activation function
 41
 42        nodes_sim: str
 43            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 44            'uniform'
 45
 46        bias: boolean
 47            indicates if the hidden layer contains a bias term (True) or not
 48            (False)
 49
 50        dropout: float
 51            regularization parameter; (random) percentage of nodes dropped out
 52            of the training
 53
 54        n_clusters: int
 55            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 56                no clustering)
 57
 58        cluster_encode: bool
 59            defines how the variable containing clusters is treated (default is one-hot)
 60            if `False`, then labels are used, without one-hot encoding
 61
 62        type_clust: str
 63            type of clustering method: currently k-means ('kmeans') or Gaussian
 64            Mixture Model ('gmm')
 65
 66        type_scaling: a tuple of 3 strings
 67            scaling methods for inputs, hidden layer, and clustering respectively
 68            (and when relevant).
 69            Currently available: standardization ('std') or MinMax scaling ('minmax')
 70
 71        lambda1: float
 72            regularization parameter on direct link
 73
 74        lambda2: float
 75            regularization parameter on hidden layer
 76
 77        seed: int
 78            reproducibility seed for nodes_sim=='uniform'
 79
 80        backend: str
 81            'cpu' or 'gpu' or 'tpu'
 82
 83    Attributes:
 84
 85        beta_: {array-like}, shape = [n_features, n_outputs]
 86            regression coefficients
 87
 88        coef_: {array-like}
 89            alias for `beta_`, regression coefficients
 90
 91        y_mean_: array-like, shape = [n_outputs]
 92            average response for each output
 93
 94    """
 95
 96    def __init__(
 97        self,
 98        n_hidden_features=5,
 99        activation_name="relu",
100        a=0.01,
101        nodes_sim="sobol",
102        bias=True,
103        dropout=0,
104        n_clusters=2,
105        cluster_encode=True,
106        type_clust="kmeans",
107        type_scaling=("std", "std", "std"),
108        lambda1=0.1,
109        lambda2=0.1,
110        seed=123,
111        backend="cpu",
112    ):
113        if not JAX_AVAILABLE and backend != "cpu":
114            raise RuntimeError(
115                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
116            )
117
118        super().__init__(
119            n_hidden_features=n_hidden_features,
120            activation_name=activation_name,
121            a=a,
122            nodes_sim=nodes_sim,
123            bias=bias,
124            dropout=dropout,
125            n_clusters=n_clusters,
126            cluster_encode=cluster_encode,
127            type_clust=type_clust,
128            type_scaling=type_scaling,
129            lambda1=lambda1,
130            lambda2=lambda2,
131            seed=seed,
132            backend=backend,
133        )
134
135        self.type_fit = "regression"
136        self.coef_ = None
137        self.use_jax = JAX_AVAILABLE and backend in ("gpu", "tpu")
138
139    def fit(self, X, y, **kwargs):
140        """Fit Ridge model to training data (X, y) with multiple outputs.
141
142        Args:
143
144            X: {array-like}, shape = [n_samples, n_features]
145                Training vectors, where n_samples is the number
146                of samples and n_features is the number of features.
147
148            y: array-like, shape = [n_samples] or [n_samples, n_outputs]
149                Target values. Can be 1D for single output or 2D for multiple outputs.
150
151            **kwargs: additional parameters to be passed to
152                    self.cook_training_set or self.obj.fit
153
154        Returns:
155
156            self: object
157
158        """
159
160        sys_platform = platform.system()
161
162        # Ensure y is 2D
163        y = np.atleast_2d(y)
164        if y.shape[0] == 1 and y.shape[1] > 1:
165            y = y.T
166
167        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
168
169        n_X, p_X = X.shape
170        n_Z, p_Z = scaled_Z.shape
171        n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1
172
173        if self.n_clusters > 0:
174            if self.encode_clusters == True:
175                n_features = p_X + self.n_clusters
176            else:
177                n_features = p_X + 1
178        else:
179            n_features = p_X
180
181        X_ = scaled_Z[:, 0:n_features]
182        Phi_X_ = scaled_Z[:, n_features:p_Z]
183
184        # Use JAX if available and requested
185        if self.use_jax:
186            X_ = jnp.array(X_)
187            Phi_X_ = jnp.array(Phi_X_)
188            centered_y = jnp.array(centered_y)
189
190            # Compute all matrix operations with JAX
191            B = jnp.dot(X_.T, X_) + self.lambda1 * jnp.eye(n_features)
192            C = jnp.dot(Phi_X_.T, X_)
193            D = jnp.dot(Phi_X_.T, Phi_X_) + self.lambda2 * jnp.eye(
194                Phi_X_.shape[1]
195            )
196
197            B_inv = jpinv(B)
198            W = jnp.dot(C, B_inv)
199            S_mat = D - jnp.dot(W, C.T)
200            S_inv = jpinv(S_mat)
201            Y = jnp.dot(S_inv, W)
202
203            # Build inverse matrix
204            inv_upper = jnp.hstack([B_inv + jnp.dot(W.T, Y), -Y.T])
205            inv_lower = jnp.hstack([-Y, S_inv])
206            inv = jnp.vstack([inv_upper, inv_lower])
207
208            # Compute beta for all outputs at once (vectorized)
209            Z_T_y = jnp.dot(scaled_Z.T, centered_y)
210            self.beta_ = jnp.dot(inv, Z_T_y)
211
212            # Convert back to numpy
213            self.beta_ = np.array(self.beta_)
214        else:
215            # NumPy version
216            B = mo.crossprod(
217                x=X_, backend=self.backend
218            ) + self.lambda1 * np.diag(np.repeat(1, n_features))
219            C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
220            D = mo.crossprod(
221                x=Phi_X_, backend=self.backend
222            ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
223
224            if sys_platform in ("Linux", "Darwin"):
225                B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
226            else:
227                B_inv = pinv(B)
228
229            W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
230            S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
231
232            if sys_platform in ("Linux", "Darwin"):
233                S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
234            else:
235                S_inv = pinv(S_mat)
236
237            Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
238            inv = mo.rbind(
239                mo.cbind(
240                    x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
241                    y=-np.transpose(Y),
242                    backend=self.backend,
243                ),
244                mo.cbind(x=-Y, y=S_inv, backend=self.backend),
245                backend=self.backend,
246            )
247
248            # Vectorized multi-output computation (no loop)
249            Z_T_y = mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend)
250            self.beta_ = mo.safe_sparse_dot(
251                a=inv, b=Z_T_y, backend=self.backend
252            )
253
254        self.coef_ = self.beta_  # sklearn compatibility
255
256        return self
257
258    def predict(self, X, **kwargs):
259        """Predict test data X for all outputs.
260
261        Args:
262
263            X: {array-like}, shape = [n_samples, n_features]
264                Training vectors, where n_samples is the number
265                of samples and n_features is the number of features.
266
267            **kwargs: additional parameters to be passed to
268                    self.cook_test_set
269
270        Returns:
271
272            model predictions: {array-like}, shape = [n_samples, n_outputs]
273
274        """
275
276        if len(X.shape) == 1:
277            n_features = X.shape[0]
278            new_X = mo.rbind(
279                x=X.reshape(1, n_features),
280                y=np.ones(n_features).reshape(1, n_features),
281                backend=self.backend,
282            )
283
284            cooked = self.cook_test_set(new_X, **kwargs)
285
286            if self.use_jax:
287                cooked = jnp.array(cooked)
288                predictions = self.y_mean_ + jnp.dot(cooked, self.beta_)
289                return np.array(predictions[0])
290            else:
291                return (
292                    self.y_mean_
293                    + mo.safe_sparse_dot(
294                        a=cooked,
295                        b=self.beta_,
296                        backend=self.backend,
297                    )
298                )[0]
299
300        cooked = self.cook_test_set(X, **kwargs)
301
302        if self.use_jax:
303            cooked = jnp.array(cooked)
304            predictions = self.y_mean_ + jnp.dot(cooked, self.beta_)
305            return np.array(predictions)
306        else:
307            return self.y_mean_ + mo.safe_sparse_dot(
308                a=cooked,
309                b=self.beta_,
310                backend=self.backend,
311            )
312
313    def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs):
314        """Incrementally fit the Ridge model using vectorized SGD updates (zero-loop with JAX).
315
316        Uses vectorized update rule for all outputs simultaneously.
317
318        Args:
319            X: {array-like}, shape = [n_samples, n_features]
320                Training vectors for this batch
321
322            y: array-like, shape = [n_samples] or [n_samples, n_outputs]
323                Target values for this batch
324
325            learning_rate: float, default=0.01
326                Initial learning rate for SGD updates
327
328            decay: float, default=0.001
329                Learning rate decay parameter
330
331            **kwargs: additional parameters to be passed to self.cook_training_set
332
333        Returns:
334            self: object
335        """
336
337        # Input validation
338        X = np.asarray(X)
339        y = np.atleast_2d(y)
340        if y.shape[0] == 1 and y.shape[1] > 1:
341            y = y.T
342
343        if X.shape[0] != y.shape[0]:
344            raise ValueError("X and y must have the same number of samples")
345
346        # Handle first call
347        if not self._is_fitted:
348            self.initial_learning_rate = learning_rate
349            self.decay = decay
350            self._step_count = 0
351            self._is_fitted = True
352
353        # Process the batch
354        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
355
356        # Get dimensions
357        n_samples, n_features_total = scaled_Z.shape
358        n_original_features = X.shape[1]
359        n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1
360
361        # Determine feature dimensions for regularization
362        if self.n_clusters > 0:
363            if self.cluster_encode:
364                n_direct_features = n_original_features + self.n_clusters
365            else:
366                n_direct_features = n_original_features + 1
367        else:
368            n_direct_features = n_original_features
369
370        # Initialize beta_ if first time
371        if not hasattr(self, "beta_") or self.beta_ is None:
372            self.beta_ = np.zeros((n_features_total, n_outputs))
373
374        # Create regularization mask
375        reg_mask = np.concatenate(
376            [
377                np.full(n_direct_features, self.lambda1),
378                np.full(n_features_total - n_direct_features, self.lambda2),
379            ]
380        )[
381            :, np.newaxis
382        ]  # Shape: [n_features_total, 1]
383
384        if self.use_jax:
385            # JAX vectorized implementation (fully zero-loop)
386            scaled_Z = jnp.array(scaled_Z)
387            centered_y = jnp.array(centered_y)
388            self.beta_ = jnp.array(self.beta_)
389            reg_mask = jnp.array(reg_mask)
390
391            # Vectorized over all samples using scan
392            def update_step(beta, inputs):
393                step, x_i, y_i = inputs
394
395                # Learning rate with decay
396                lr = self.initial_learning_rate / (1 + self.decay * step)
397
398                # Prediction: x_i @ beta -> [n_outputs]
399                prediction = jnp.dot(x_i, beta)
400
401                # Error: y_i - prediction -> [n_outputs]
402                error = y_i - prediction
403
404                # Gradient update (vectorized): lr * outer(x_i, error)
405                gradient_update = lr * jnp.outer(x_i, error)
406
407                # Regularization: lr * (reg_mask * beta)
408                reg_update = lr * (reg_mask * beta)
409
410                # Update: beta = beta + gradient - regularization
411                beta_new = beta + gradient_update - reg_update
412
413                return beta_new, None
414
415            # Create step indices
416            steps = jnp.arange(
417                self._step_count + 1, self._step_count + n_samples + 1
418            )
419
420            # Run scan (zero-loop)
421            self.beta_, _ = jax.lax.scan(
422                update_step, self.beta_, (steps, scaled_Z, centered_y)
423            )
424
425            self.beta_ = np.array(self.beta_)
426            self._step_count += n_samples
427        else:
428            # NumPy vectorized implementation (single loop over samples)
429            for i in range(n_samples):
430                self._step_count += 1
431
432                # Current learning rate with decay
433                current_lr = self.initial_learning_rate / (
434                    1 + self.decay * self._step_count
435                )
436
437                # Current sample and target
438                x_i = scaled_Z[i, :]  # [n_features_total]
439                y_i = centered_y[i, :]  # [n_outputs]
440
441                # Prediction: x_i @ beta -> [n_outputs]
442                prediction = x_i @ self.beta_
443
444                # Error: y_i - prediction -> [n_outputs]
445                error = y_i - prediction
446
447                # Vectorized gradient update: outer product
448                # Shape: [n_features_total, n_outputs]
449                gradient_update = current_lr * np.outer(x_i, error)
450
451                # Vectorized regularization update
452                reg_update = current_lr * (reg_mask * self.beta_)
453
454                # Combined update
455                self.beta_ += gradient_update - reg_update
456
457        self.coef_ = self.beta_  # sklearn compatibility
458
459        return self

Ridge regression with 2 regularization parameters for multiple outputs (zero-loop, JAX-optimized)

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    'cpu' or 'gpu' or 'tpu'

Attributes:

beta_: {array-like}, shape = [n_features, n_outputs]
    regression coefficients

coef_: {array-like}
    alias for `beta_`, regression coefficients

y_mean_: array-like, shape = [n_outputs]
    average response for each output
def fit(self, X, y, **kwargs):
139    def fit(self, X, y, **kwargs):
140        """Fit Ridge model to training data (X, y) with multiple outputs.
141
142        Args:
143
144            X: {array-like}, shape = [n_samples, n_features]
145                Training vectors, where n_samples is the number
146                of samples and n_features is the number of features.
147
148            y: array-like, shape = [n_samples] or [n_samples, n_outputs]
149                Target values. Can be 1D for single output or 2D for multiple outputs.
150
151            **kwargs: additional parameters to be passed to
152                    self.cook_training_set or self.obj.fit
153
154        Returns:
155
156            self: object
157
158        """
159
160        sys_platform = platform.system()
161
162        # Ensure y is 2D
163        y = np.atleast_2d(y)
164        if y.shape[0] == 1 and y.shape[1] > 1:
165            y = y.T
166
167        centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
168
169        n_X, p_X = X.shape
170        n_Z, p_Z = scaled_Z.shape
171        n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1
172
173        if self.n_clusters > 0:
174            if self.encode_clusters == True:
175                n_features = p_X + self.n_clusters
176            else:
177                n_features = p_X + 1
178        else:
179            n_features = p_X
180
181        X_ = scaled_Z[:, 0:n_features]
182        Phi_X_ = scaled_Z[:, n_features:p_Z]
183
184        # Use JAX if available and requested
185        if self.use_jax:
186            X_ = jnp.array(X_)
187            Phi_X_ = jnp.array(Phi_X_)
188            centered_y = jnp.array(centered_y)
189
190            # Compute all matrix operations with JAX
191            B = jnp.dot(X_.T, X_) + self.lambda1 * jnp.eye(n_features)
192            C = jnp.dot(Phi_X_.T, X_)
193            D = jnp.dot(Phi_X_.T, Phi_X_) + self.lambda2 * jnp.eye(
194                Phi_X_.shape[1]
195            )
196
197            B_inv = jpinv(B)
198            W = jnp.dot(C, B_inv)
199            S_mat = D - jnp.dot(W, C.T)
200            S_inv = jpinv(S_mat)
201            Y = jnp.dot(S_inv, W)
202
203            # Build inverse matrix
204            inv_upper = jnp.hstack([B_inv + jnp.dot(W.T, Y), -Y.T])
205            inv_lower = jnp.hstack([-Y, S_inv])
206            inv = jnp.vstack([inv_upper, inv_lower])
207
208            # Compute beta for all outputs at once (vectorized)
209            Z_T_y = jnp.dot(scaled_Z.T, centered_y)
210            self.beta_ = jnp.dot(inv, Z_T_y)
211
212            # Convert back to numpy
213            self.beta_ = np.array(self.beta_)
214        else:
215            # NumPy version
216            B = mo.crossprod(
217                x=X_, backend=self.backend
218            ) + self.lambda1 * np.diag(np.repeat(1, n_features))
219            C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
220            D = mo.crossprod(
221                x=Phi_X_, backend=self.backend
222            ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
223
224            if sys_platform in ("Linux", "Darwin"):
225                B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
226            else:
227                B_inv = pinv(B)
228
229            W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
230            S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
231
232            if sys_platform in ("Linux", "Darwin"):
233                S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
234            else:
235                S_inv = pinv(S_mat)
236
237            Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
238            inv = mo.rbind(
239                mo.cbind(
240                    x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend),
241                    y=-np.transpose(Y),
242                    backend=self.backend,
243                ),
244                mo.cbind(x=-Y, y=S_inv, backend=self.backend),
245                backend=self.backend,
246            )
247
248            # Vectorized multi-output computation (no loop)
249            Z_T_y = mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend)
250            self.beta_ = mo.safe_sparse_dot(
251                a=inv, b=Z_T_y, backend=self.backend
252            )
253
254        self.coef_ = self.beta_  # sklearn compatibility
255
256        return self

Fit Ridge model to training data (X, y) with multiple outputs.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples] or [n_samples, n_outputs]
    Target values. Can be 1D for single output or 2D for multiple outputs.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
258    def predict(self, X, **kwargs):
259        """Predict test data X for all outputs.
260
261        Args:
262
263            X: {array-like}, shape = [n_samples, n_features]
264                Training vectors, where n_samples is the number
265                of samples and n_features is the number of features.
266
267            **kwargs: additional parameters to be passed to
268                    self.cook_test_set
269
270        Returns:
271
272            model predictions: {array-like}, shape = [n_samples, n_outputs]
273
274        """
275
276        if len(X.shape) == 1:
277            n_features = X.shape[0]
278            new_X = mo.rbind(
279                x=X.reshape(1, n_features),
280                y=np.ones(n_features).reshape(1, n_features),
281                backend=self.backend,
282            )
283
284            cooked = self.cook_test_set(new_X, **kwargs)
285
286            if self.use_jax:
287                cooked = jnp.array(cooked)
288                predictions = self.y_mean_ + jnp.dot(cooked, self.beta_)
289                return np.array(predictions[0])
290            else:
291                return (
292                    self.y_mean_
293                    + mo.safe_sparse_dot(
294                        a=cooked,
295                        b=self.beta_,
296                        backend=self.backend,
297                    )
298                )[0]
299
300        cooked = self.cook_test_set(X, **kwargs)
301
302        if self.use_jax:
303            cooked = jnp.array(cooked)
304            predictions = self.y_mean_ + jnp.dot(cooked, self.beta_)
305            return np.array(predictions)
306        else:
307            return self.y_mean_ + mo.safe_sparse_dot(
308                a=cooked,
309                b=self.beta_,
310                backend=self.backend,
311            )

Predict test data X for all outputs.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}, shape = [n_samples, n_outputs]
class Ridge2Classifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 18class Ridge2Classifier(Ridge2, ClassifierMixin):
 19    """Multinomial logit classification with 2 regularization parameters
 20
 21    Parameters:
 22
 23        n_hidden_features: int
 24            number of nodes in the hidden layer
 25
 26        activation_name: str
 27            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 28
 29        a: float
 30            hyperparameter for 'prelu' or 'elu' activation function
 31
 32        nodes_sim: str
 33            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 34            'uniform'
 35
 36        bias: boolean
 37            indicates if the hidden layer contains a bias term (True) or not
 38            (False)
 39
 40        dropout: float
 41            regularization parameter; (random) percentage of nodes dropped out
 42            of the training
 43
 44        direct_link: boolean
 45            indicates if the original predictors are included (True) in model's
 46            fitting or not (False)
 47
 48        n_clusters: int
 49            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 50                no clustering)
 51
 52        cluster_encode: bool
 53            defines how the variable containing clusters is treated (default is one-hot)
 54            if `False`, then labels are used, without one-hot encoding
 55
 56        type_clust: str
 57            type of clustering method: currently k-means ('kmeans') or Gaussian
 58            Mixture Model ('gmm')
 59
 60        type_scaling: a tuple of 3 strings
 61            scaling methods for inputs, hidden layer, and clustering respectively
 62            (and when relevant).
 63            Currently available: standardization ('std') or MinMax scaling ('minmax')
 64
 65        lambda1: float
 66            regularization parameter on direct link
 67
 68        lambda2: float
 69            regularization parameter on hidden layer
 70
 71        solver: str
 72            optimization function "L-BFGS-B",  "Newton-CG",
 73            "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
 74            "trust-ncg-lstsq" (see scipy.optimize.minimize)
 75            When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
 76            the initial value for the optimization is set to the least squares solution
 77
 78        seed: int
 79            reproducibility seed for nodes_sim=='uniform'
 80
 81        backend: str
 82            "cpu" or "gpu" or "tpu"
 83
 84    Attributes:
 85
 86        beta_: {array-like}
 87            regression coefficients
 88
 89        classes_: {array-like}
 90            unique classes in the target variable
 91
 92        minloglik_: float
 93            minimum value of the negative log-likelihood
 94
 95    Examples:
 96
 97    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py)
 98
 99    ```python
100    import nnetsauce as ns
101    import numpy as np
102    from sklearn.datasets import load_breast_cancer
103    from sklearn.model_selection import train_test_split
104    from time import time
105
106
107    breast_cancer = load_breast_cancer()
108    X = breast_cancer.data
109    y = breast_cancer.target
110
111    # split data into training test and test set
112    np.random.seed(123)
113    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
114
115    # create the model with nnetsauce
116    fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
117                                lambda2 = 3.17392781e+02,
118                                n_hidden_features=95,
119                                n_clusters=2,
120                                dropout = 3.62817383e-01,
121                                type_clust = "gmm")
122
123    # fit the model on training set
124    start = time()
125    fit_obj.fit(X_train, y_train)
126    print(f"Elapsed {time() - start}")
127
128    # get the accuracy on test set
129    start = time()
130    print(fit_obj.score(X_test, y_test))
131    print(f"Elapsed {time() - start}")
132
133    # get area under the curve on test set (auc)
134    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
135    ```
136
137
138    """
139
140    _estimator_type = "classifier"
141
142    # construct the object -----
143
144    def __init__(
145        self,
146        n_hidden_features=5,
147        activation_name="relu",
148        a=0.01,
149        nodes_sim="sobol",
150        bias=True,
151        dropout=0,
152        direct_link=True,
153        n_clusters=2,
154        cluster_encode=True,
155        type_clust="kmeans",
156        type_scaling=("std", "std", "std"),
157        lambda1=0.1,
158        lambda2=0.1,
159        solver="L-BFGS-B",
160        seed=123,
161        backend="cpu",
162    ):
163        super().__init__(
164            n_hidden_features=n_hidden_features,
165            activation_name=activation_name,
166            a=a,
167            nodes_sim=nodes_sim,
168            bias=bias,
169            dropout=dropout,
170            direct_link=direct_link,
171            n_clusters=n_clusters,
172            cluster_encode=cluster_encode,
173            type_clust=type_clust,
174            type_scaling=type_scaling,
175            lambda1=lambda1,
176            lambda2=lambda2,
177            seed=seed,
178            backend=backend,
179        )
180
181        self.type_fit = "classification"
182        self.solver = solver
183        self.beta_ = None
184        self.classes_ = None
185        self.minloglik_ = None
186        self.coef_ = None
187
188    def loglik(self, X, Y, **kwargs):
189        """Log-likelihood for training data (X, Y).
190
191        Args:
192
193            X: {array-like}, shape = [n_samples, n_features]
194                Training vectors, where n_samples is the number
195                of samples and n_features is the number of features.
196
197            Y: array-like, shape = [n_samples]
198                One-hot encode target values.
199
200            **kwargs: additional parameters to be passed to
201                    self.cook_training_set or self.obj.fit
202
203        Returns:
204
205        """
206
207        def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs):
208            # nobs, n_classes
209            n, K = Y.shape
210
211            # total number of covariates
212            p = X.shape[1]
213
214            # initial number of covariates
215            init_p = p - self.n_hidden_features
216
217            max_double = 709.0
218            XB[XB > max_double] = max_double
219            exp_XB = np.exp(XB)
220            probs = exp_XB / exp_XB.sum(axis=1)[:, None]
221
222            # gradient -----
223            # (Y - p) -> (n, K)
224            # X -> (n, p)
225            # (K, n) %*% (n, p) -> (K, p)
226            if hessian is False:
227                grad = (
228                    -mo.safe_sparse_dot(
229                        a=(Y - probs).T, b=X, backend=self.backend
230                    )
231                    / n
232                )
233                grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None]
234                grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None]
235
236                return grad.flatten()
237
238            # hessian -----
239            if hessian is True:
240                Kp = K * p
241                hess = np.zeros((Kp, Kp), float)
242                for k1 in range(K):
243                    x_index = range(k1 * p, (k1 + 1) * p)
244                    for k2 in range(k1, K):
245                        y_index = range(k2 * p, (k2 + 1) * p)
246                        H_sub = (
247                            -mo.safe_sparse_dot(
248                                a=X.T,
249                                b=(probs[:, k1] * probs[:, k2])[:, None] * X,
250                                backend=self.backend,
251                            )
252                            / n
253                        )  # do not store
254                        hess[np.ix_(x_index, y_index)] = hess[
255                            np.ix_(y_index, x_index)
256                        ] = H_sub
257
258                return hess + (self.lambda1 + self.lambda2) * np.identity(Kp)
259
260        # total number of covariates
261        p = X.shape[1]
262
263        # initial number of covariates
264        init_p = p - self.n_hidden_features
265
266        # log-likelihood (1st return)
267        def loglik_func(x):
268            # (p, K)
269            B = x.reshape(Y.shape[1], p).T
270
271            # (n, K)
272            XB = mo.safe_sparse_dot(X, B, backend=self.backend)
273
274            res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean()
275
276            res += (
277                0.5
278                * self.lambda1
279                * mo.squared_norm(B[0:init_p, :], backend=self.backend)
280            )
281            res += (
282                0.5
283                * self.lambda2
284                * mo.squared_norm(B[init_p:p, :], backend=self.backend)
285            )
286
287            return res
288
289        # gradient of log-likelihood
290        def grad_func(x):
291            # (p, K)
292            B = x.reshape(Y.shape[1], p).T
293
294            return loglik_grad_hess(
295                Y=Y,
296                X=X,
297                B=B,
298                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
299                hessian=False,
300                **kwargs
301            )
302
303        # hessian of log-likelihood
304        def hessian_func(x):
305            # (p, K)
306            B = x.reshape(Y.shape[1], p).T
307
308            return loglik_grad_hess(
309                Y=Y,
310                X=X,
311                B=B,
312                XB=mo.safe_sparse_dot(X, B, backend=self.backend),
313                hessian=True,
314                **kwargs
315            )
316
317        return loglik_func, grad_func, hessian_func
318
319    # newton-cg
320    # L-BFGS-B
321    def fit(self, X, y, **kwargs):
322        """Fit Ridge model to training data (X, y).
323
324        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
325        for K classes and p covariates.
326
327        Args:
328
329            X: {array-like}, shape = [n_samples, n_features]
330                Training vectors, where n_samples is the number
331                of samples and n_features is the number of features.
332
333            y: array-like, shape = [n_samples]
334                Target values.
335
336            **kwargs: additional parameters to be passed to
337                    self.cook_training_set or self.obj.fit
338
339        Returns:
340
341            self: object
342
343        """
344
345        assert mx.is_factor(y), "y must contain only integers"
346
347        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
348
349        self.n_classes = len(np.unique(y))
350        self.classes_ = np.unique(y)  # for compatibility with sklearn
351        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
352
353        Y = mo.one_hot_encode2(output_y, self.n_classes)
354
355        # optimize for beta, minimize self.loglik (maximize loglik) -----
356        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
357
358        if self.solver == "L-BFGS-B":
359            opt = minimize(
360                fun=loglik_func,
361                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
362                jac=grad_func,
363                method=self.solver,
364            )
365            self.beta_ = opt.x
366            self.minloglik_ = opt.fun
367
368        if self.solver in ("Newton-CG", "trust-ncg"):
369            opt = minimize(
370                fun=loglik_func,
371                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
372                jac=grad_func,
373                hess=hessian_func,
374                method=self.solver,
375            )
376            self.beta_ = opt.x
377            self.minloglik_ = opt.fun
378
379        if self.solver == "L-BFGS-B-lstsq":
380            opt = minimize(
381                fun=loglik_func,
382                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
383                    order="F"
384                ),
385                jac=grad_func,
386                method="L-BFGS-B",
387            )
388            self.beta_ = opt.x
389            self.minloglik_ = opt.fun
390
391        if self.solver in "Newton-CG-lstsq":
392            opt = minimize(
393                fun=loglik_func,
394                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
395                    order="F"
396                ),
397                jac=grad_func,
398                hess=hessian_func,
399                method="Newton-CG",
400            )
401            self.beta_ = opt.x
402            self.minloglik_ = opt.fun
403
404        if self.solver in "trust-ncg-lstsq":
405            opt = minimize(
406                fun=loglik_func,
407                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
408                    order="F"
409                ),
410                jac=grad_func,
411                hess=hessian_func,
412                method="trust-ncg",
413            )
414            self.beta_ = opt.x
415            self.minloglik_ = opt.fun
416
417        self.coef_ = self.beta_
418
419        self.classes_ = np.unique(y)
420
421        return self
422
423    def predict(self, X, **kwargs):
424        """Predict test data X.
425
426        Args:
427
428            X: {array-like}, shape = [n_samples, n_features]
429                Training vectors, where n_samples is the number
430                of samples and n_features is the number of features.
431
432            **kwargs: additional parameters to be passed to
433                    self.cook_test_set
434
435        Returns:
436
437            model predictions: {array-like}
438        """
439
440        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
441
442    def predict_proba(self, X, **kwargs):
443        """Predict probabilities for test data X.
444
445        Args:
446
447            X: {array-like}, shape = [n_samples, n_features]
448                Training vectors, where n_samples is the number
449                of samples and n_features is the number of features.
450
451            **kwargs: additional parameters to be passed to
452                    self.cook_test_set
453
454        Returns:
455
456            probability estimates for test data: {array-like}
457
458        """
459        if len(X.shape) == 1:
460            n_features = X.shape[0]
461            new_X = mo.rbind(
462                X.reshape(1, n_features),
463                np.ones(n_features).reshape(1, n_features),
464            )
465
466            Z = self.cook_test_set(new_X, **kwargs)
467
468        else:
469            Z = self.cook_test_set(X, **kwargs)
470
471        ZB = mo.safe_sparse_dot(
472            a=Z,
473            b=self.beta_.reshape(
474                self.n_classes,
475                X.shape[1] + self.n_hidden_features + self.n_clusters,
476            ).T,
477            backend=self.backend,
478        )
479
480        exp_ZB = np.exp(ZB)
481
482        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
483
484    @property
485    def _estimator_type(self):
486        return "classifier"

Multinomial logit classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

direct_link: boolean
    indicates if the original predictors are included (True) in model's
    fitting or not (False)

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

solver: str
    optimization function "L-BFGS-B",  "Newton-CG",
    "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
    "trust-ncg-lstsq" (see scipy.optimize.minimize)
    When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
    the initial value for the optimization is set to the least squares solution

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

classes_: {array-like}
    unique classes in the target variable

minloglik_: float
    minimum value of the negative log-likelihood

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time


breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
                            lambda2 = 3.17392781e+02,
                            n_hidden_features=95,
                            n_clusters=2,
                            dropout = 3.62817383e-01,
                            type_clust = "gmm")

# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")

# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
def fit(self, X, y, **kwargs):
321    def fit(self, X, y, **kwargs):
322        """Fit Ridge model to training data (X, y).
323
324        for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp)
325        for K classes and p covariates.
326
327        Args:
328
329            X: {array-like}, shape = [n_samples, n_features]
330                Training vectors, where n_samples is the number
331                of samples and n_features is the number of features.
332
333            y: array-like, shape = [n_samples]
334                Target values.
335
336            **kwargs: additional parameters to be passed to
337                    self.cook_training_set or self.obj.fit
338
339        Returns:
340
341            self: object
342
343        """
344
345        assert mx.is_factor(y), "y must contain only integers"
346
347        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
348
349        self.n_classes = len(np.unique(y))
350        self.classes_ = np.unique(y)  # for compatibility with sklearn
351        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
352
353        Y = mo.one_hot_encode2(output_y, self.n_classes)
354
355        # optimize for beta, minimize self.loglik (maximize loglik) -----
356        loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y)
357
358        if self.solver == "L-BFGS-B":
359            opt = minimize(
360                fun=loglik_func,
361                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
362                jac=grad_func,
363                method=self.solver,
364            )
365            self.beta_ = opt.x
366            self.minloglik_ = opt.fun
367
368        if self.solver in ("Newton-CG", "trust-ncg"):
369            opt = minimize(
370                fun=loglik_func,
371                x0=np.zeros(scaled_Z.shape[1] * self.n_classes),
372                jac=grad_func,
373                hess=hessian_func,
374                method=self.solver,
375            )
376            self.beta_ = opt.x
377            self.minloglik_ = opt.fun
378
379        if self.solver == "L-BFGS-B-lstsq":
380            opt = minimize(
381                fun=loglik_func,
382                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
383                    order="F"
384                ),
385                jac=grad_func,
386                method="L-BFGS-B",
387            )
388            self.beta_ = opt.x
389            self.minloglik_ = opt.fun
390
391        if self.solver in "Newton-CG-lstsq":
392            opt = minimize(
393                fun=loglik_func,
394                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
395                    order="F"
396                ),
397                jac=grad_func,
398                hess=hessian_func,
399                method="Newton-CG",
400            )
401            self.beta_ = opt.x
402            self.minloglik_ = opt.fun
403
404        if self.solver in "trust-ncg-lstsq":
405            opt = minimize(
406                fun=loglik_func,
407                x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(
408                    order="F"
409                ),
410                jac=grad_func,
411                hess=hessian_func,
412                method="trust-ncg",
413            )
414            self.beta_ = opt.x
415            self.minloglik_ = opt.fun
416
417        self.coef_ = self.beta_
418
419        self.classes_ = np.unique(y)
420
421        return self

Fit Ridge model to training data (X, y).

for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
423    def predict(self, X, **kwargs):
424        """Predict test data X.
425
426        Args:
427
428            X: {array-like}, shape = [n_samples, n_features]
429                Training vectors, where n_samples is the number
430                of samples and n_features is the number of features.
431
432            **kwargs: additional parameters to be passed to
433                    self.cook_test_set
434
435        Returns:
436
437            model predictions: {array-like}
438        """
439
440        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
442    def predict_proba(self, X, **kwargs):
443        """Predict probabilities for test data X.
444
445        Args:
446
447            X: {array-like}, shape = [n_samples, n_features]
448                Training vectors, where n_samples is the number
449                of samples and n_features is the number of features.
450
451            **kwargs: additional parameters to be passed to
452                    self.cook_test_set
453
454        Returns:
455
456            probability estimates for test data: {array-like}
457
458        """
459        if len(X.shape) == 1:
460            n_features = X.shape[0]
461            new_X = mo.rbind(
462                X.reshape(1, n_features),
463                np.ones(n_features).reshape(1, n_features),
464            )
465
466            Z = self.cook_test_set(new_X, **kwargs)
467
468        else:
469            Z = self.cook_test_set(X, **kwargs)
470
471        ZB = mo.safe_sparse_dot(
472            a=Z,
473            b=self.beta_.reshape(
474                self.n_classes,
475                X.shape[1] + self.n_hidden_features + self.n_clusters,
476            ).T,
477            backend=self.backend,
478        )
479
480        exp_ZB = np.exp(ZB)
481
482        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
class Ridge2MultitaskClassifier(nnetsauce.ridge2.ridge2.Ridge2, sklearn.base.ClassifierMixin):
 23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin):
 24    """Multitask Ridge classification with 2 regularization parameters
 25
 26    Parameters:
 27
 28        n_hidden_features: int
 29            number of nodes in the hidden layer
 30
 31        activation_name: str
 32            activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
 33
 34        a: float
 35            hyperparameter for 'prelu' or 'elu' activation function
 36
 37        nodes_sim: str
 38            type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
 39            'uniform'
 40
 41        bias: boolean
 42            indicates if the hidden layer contains a bias term (True) or not
 43            (False)
 44
 45        dropout: float
 46            regularization parameter; (random) percentage of nodes dropped out
 47            of the training
 48
 49        n_clusters: int
 50            number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
 51                no clustering)
 52
 53        cluster_encode: bool
 54            defines how the variable containing clusters is treated (default is one-hot)
 55            if `False`, then labels are used, without one-hot encoding
 56
 57        type_clust: str
 58            type of clustering method: currently k-means ('kmeans') or Gaussian
 59            Mixture Model ('gmm')
 60
 61        type_scaling: a tuple of 3 strings
 62            scaling methods for inputs, hidden layer, and clustering respectively
 63            (and when relevant).
 64            Currently available: standardization ('std') or MinMax scaling ('minmax')
 65
 66        lambda1: float
 67            regularization parameter on direct link
 68
 69        lambda2: float
 70            regularization parameter on hidden layer
 71
 72        seed: int
 73            reproducibility seed for nodes_sim=='uniform'
 74
 75        backend: str
 76            "cpu" or "gpu" or "tpu"
 77
 78    Attributes:
 79
 80        beta_: {array-like}
 81            regression coefficients
 82
 83        coef_: {array-like}
 84            alias for `beta_`, regression coefficients
 85
 86    Examples:
 87
 88    See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py)
 89
 90    ```python
 91    import nnetsauce as ns
 92    import numpy as np
 93    from sklearn.datasets import load_breast_cancer
 94    from sklearn.model_selection import train_test_split
 95    from sklearn import metrics
 96    from time import time
 97
 98    breast_cancer = load_breast_cancer()
 99    Z = breast_cancer.data
100    t = breast_cancer.target
101    np.random.seed(123)
102    X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
103
104    fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
105                                    dropout=4.31054687e-01,
106                                    n_clusters=int(1.71484375e+00),
107                                    lambda1=1.24023438e+01, lambda2=7.30263672e+03)
108
109    start = time()
110    fit_obj.fit(X_train, y_train)
111    print(f"Elapsed {time() - start}")
112
113    print(fit_obj.score(X_test, y_test))
114    print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
115
116    start = time()
117    preds = fit_obj.predict(X_test)
118    print(f"Elapsed {time() - start}")
119    print(metrics.classification_report(preds, y_test))
120    ```
121
122    """
123
124    # construct the object -----
125    _estimator_type = "classifier"
126
127    def __init__(
128        self,
129        n_hidden_features=5,
130        activation_name="relu",
131        a=0.01,
132        nodes_sim="sobol",
133        bias=True,
134        dropout=0,
135        n_clusters=2,
136        cluster_encode=True,
137        type_clust="kmeans",
138        type_scaling=("std", "std", "std"),
139        lambda1=0.1,
140        lambda2=0.1,
141        seed=123,
142        backend="cpu",
143    ):
144        super().__init__(
145            n_hidden_features=n_hidden_features,
146            activation_name=activation_name,
147            a=a,
148            nodes_sim=nodes_sim,
149            bias=bias,
150            dropout=dropout,
151            n_clusters=n_clusters,
152            cluster_encode=cluster_encode,
153            type_clust=type_clust,
154            type_scaling=type_scaling,
155            lambda1=lambda1,
156            lambda2=lambda2,
157            seed=seed,
158            backend=backend,
159        )
160
161        self.type_fit = "classification"
162        self.coef_ = None
163
164    def fit(self, X, y, **kwargs):
165        """Fit Ridge model to training data (X, y).
166
167        Args:
168
169            X: {array-like}, shape = [n_samples, n_features]
170                Training vectors, where n_samples is the number
171                of samples and n_features is the number of features.
172
173            y: array-like, shape = [n_samples]
174                Target values.
175
176            **kwargs: additional parameters to be passed to
177                    self.cook_training_set or self.obj.fit
178
179        Returns:
180
181            self: object
182
183        """
184
185        sys_platform = platform.system()
186
187        assert mx.is_factor(y), "y must contain only integers"
188
189        self.classes_ = np.unique(y)  # for compatibility with sklearn
190        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
191
192        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
193
194        n_X, p_X = X.shape
195        n_Z, p_Z = scaled_Z.shape
196
197        self.n_classes = len(np.unique(y))
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes)
201
202        if self.n_clusters > 0:
203            if self.encode_clusters == True:
204                n_features = p_X + self.n_clusters
205            else:
206                n_features = p_X + 1
207        else:
208            n_features = p_X
209
210        X_ = scaled_Z[:, 0:n_features]
211        Phi_X_ = scaled_Z[:, n_features:p_Z]
212
213        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
214            np.repeat(1, X_.shape[1])
215        )
216        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
217        D = mo.crossprod(
218            x=Phi_X_, backend=self.backend
219        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
220
221        if sys_platform in ("Linux", "Darwin"):
222            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
223        else:
224            B_inv = pinv(B)
225
226        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
227        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
228
229        if sys_platform in ("Linux", "Darwin"):
230            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
231        else:
232            S_inv = pinv(S_mat)
233
234        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
235        inv = mo.rbind(
236            mo.cbind(
237                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
238                y=-np.transpose(Y2),
239                backend=self.backend,
240            ),
241            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
242            backend=self.backend,
243        )
244
245        self.beta_ = mo.safe_sparse_dot(
246            a=inv,
247            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
248            backend=self.backend,
249        )
250        self.coef_ = self.beta_  # sklearn compatibility
251        self.classes_ = np.unique(y)
252        self._is_fitted = True
253        return self
254
255    def predict(self, X, **kwargs):
256        """Predict test data X.
257
258        Args:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            **kwargs: additional parameters to be passed to
265                    self.cook_test_set
266
267        Returns:
268
269            model predictions: {array-like}
270
271        """
272
273        return np.argmax(self.predict_proba(X, **kwargs), axis=1)
274
275    def predict_proba(self, X, **kwargs):
276        """Predict probabilities for test data X.
277
278        Args:
279
280            X: {array-like}, shape = [n_samples, n_features]
281                Training vectors, where n_samples is the number
282                of samples and n_features is the number of features.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288
289            probability estimates for test data: {array-like}
290
291        """
292
293        if len(X.shape) == 1:
294            n_features = X.shape[0]
295            new_X = mo.rbind(
296                x=X.reshape(1, n_features),
297                y=np.ones(n_features).reshape(1, n_features),
298                backend=self.backend,
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
307
308        exp_ZB = np.exp(ZB)
309
310        return exp_ZB / exp_ZB.sum(axis=1)[:, None]
311
312    def score(self, X, y, scoring=None):
313        """Scoring function for classification.
314
315        Args:
316
317            X: {array-like}, shape = [n_samples, n_features]
318                Training vectors, where n_samples is the number
319                of samples and n_features is the number of features.
320
321            y: array-like, shape = [n_samples]
322                Target values.
323
324            scoring: str
325                scoring method (default is accuracy)
326
327        Returns:
328
329            score: float
330        """
331
332        if scoring is None:
333            scoring = "accuracy"
334
335        if scoring == "accuracy":
336            return skm2.accuracy_score(y, self.predict(X))
337
338        if scoring == "f1":
339            return skm2.f1_score(y, self.predict(X))
340
341        if scoring == "precision":
342            return skm2.precision_score(y, self.predict(X))
343
344        if scoring == "recall":
345            return skm2.recall_score(y, self.predict(X))
346
347        if scoring == "roc_auc":
348            return skm2.roc_auc_score(y, self.predict(X))
349
350        if scoring == "log_loss":
351            return skm2.log_loss(y, self.predict_proba(X))
352
353        if scoring == "balanced_accuracy":
354            return skm2.balanced_accuracy_score(y, self.predict(X))
355
356        if scoring == "average_precision":
357            return skm2.average_precision_score(y, self.predict(X))
358
359        if scoring == "neg_brier_score":
360            return -skm2.brier_score_loss(y, self.predict_proba(X))
361
362        if scoring == "neg_log_loss":
363            return -skm2.log_loss(y, self.predict_proba(X))
364
365    @property
366    def _estimator_type(self):
367        return "classifier"
368
369    def partial_fit(
370        self, X, y, classes=None, learning_rate=0.01, decay=0.001, **kwargs
371    ):
372        """Incrementally fit the Ridge model using SGD-style updates.
373
374        Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n
375        for online learning with individual samples.
376
377        Args:
378            X: {array-like}, shape = [n_samples, n_features]
379                Training vectors for this batch
380
381            y: array-like, shape = [n_samples]
382                Target values for this batch
383
384            classes: array-like, shape = [n_classes], optional
385                List of all possible target classes. Must be provided on first call
386                to partial_fit if not already fitted.
387
388            learning_rate: float, default=0.01
389                Initial learning rate for SGD updates
390
391            decay: float, default=0.001
392                Learning rate decay parameter
393
394            **kwargs: additional parameters to be passed to self.cook_training_set
395
396        Returns:
397            self: object
398        """
399        # Input validation
400        X = np.asarray(X)
401        y = np.asarray(y)
402
403        if X.shape[0] != y.shape[0]:
404            raise ValueError("X and y must have the same number of samples")
405
406        assert mx.is_factor(y), "y must contain only integers"
407
408        # Handle classes on first call
409        if not self._is_fitted:
410            if classes is not None:
411                self.classes_ = np.array(classes)
412                self.n_classes_ = len(self.classes_)
413            else:
414                self.classes_ = np.unique(y)
415                self.n_classes_ = len(self.classes_)
416
417            self.n_classes = len(self.classes_)
418
419            # Initialize learning parameters
420            self.initial_learning_rate = learning_rate
421            self.decay = decay
422            self._step_count = 0
423            self._is_fitted = True
424
425        else:
426            # Check for new classes
427            new_classes = np.setdiff1d(y, self.classes_)
428            if len(new_classes) > 0:
429                raise ValueError(
430                    f"New classes {new_classes} encountered. "
431                    "partial_fit cannot handle new classes after first call."
432                )
433
434        # Process the batch
435        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
436
437        # Get dimensions
438        n_samples, n_features_total = scaled_Z.shape
439        n_original_features = X.shape[1]
440
441        # Create one-hot encoded targets
442        Y = mo.one_hot_encode2(output_y, self.n_classes)
443
444        # Determine feature dimensions for regularization
445        if self.n_clusters > 0:
446            if self.cluster_encode:
447                n_direct_features = n_original_features + self.n_clusters
448            else:
449                n_direct_features = n_original_features + 1
450        else:
451            n_direct_features = n_original_features
452
453        # Initialize beta_ if first time
454        if not hasattr(self, "beta_") or self.beta_ is None:
455            self.beta_ = np.zeros((n_features_total, self.n_classes))
456
457        # Precompute indices for regularization
458        direct_indices = slice(0, n_direct_features)
459        hidden_indices = slice(n_direct_features, n_features_total)
460
461        # Process each sample with SGD
462        for i in range(n_samples):
463            self._step_count += 1
464
465            # Current learning rate with decay
466            current_lr = self.initial_learning_rate / (
467                1 + self.decay * self._step_count
468            )
469
470            # Current sample and target
471            x_i = scaled_Z[i, :]  # Feature vector
472            y_i = Y[i, :]  # Target vector (one-hot)
473
474            # Prediction: x_i^T * beta
475            prediction = x_i @ self.beta_
476
477            # Error: y_i - prediction
478            error = y_i - prediction
479
480            # Gradient update: current_lr * x_i * error
481            gradient_update = current_lr * np.outer(x_i, error)
482
483            # Regularization terms (more efficient indexing)
484            reg_update = np.zeros_like(self.beta_)
485            reg_update[direct_indices, :] = (
486                current_lr * self.lambda1 * self.beta_[direct_indices, :]
487            )
488            reg_update[hidden_indices, :] = (
489                current_lr * self.lambda2 * self.beta_[hidden_indices, :]
490            )
491
492            # Combined update: beta = beta + gradient_update - reg_update
493            self.beta_ += gradient_update - reg_update
494
495        self.coef_ = self.beta_  # sklearn compatibility
496
497        return self

Multitask Ridge classification with 2 regularization parameters

Parameters:

n_hidden_features: int
    number of nodes in the hidden layer

activation_name: str
    activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'

a: float
    hyperparameter for 'prelu' or 'elu' activation function

nodes_sim: str
    type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
    'uniform'

bias: boolean
    indicates if the hidden layer contains a bias term (True) or not
    (False)

dropout: float
    regularization parameter; (random) percentage of nodes dropped out
    of the training

n_clusters: int
    number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
        no clustering)

cluster_encode: bool
    defines how the variable containing clusters is treated (default is one-hot)
    if `False`, then labels are used, without one-hot encoding

type_clust: str
    type of clustering method: currently k-means ('kmeans') or Gaussian
    Mixture Model ('gmm')

type_scaling: a tuple of 3 strings
    scaling methods for inputs, hidden layer, and clustering respectively
    (and when relevant).
    Currently available: standardization ('std') or MinMax scaling ('minmax')

lambda1: float
    regularization parameter on direct link

lambda2: float
    regularization parameter on hidden layer

seed: int
    reproducibility seed for nodes_sim=='uniform'

backend: str
    "cpu" or "gpu" or "tpu"

Attributes:

beta_: {array-like}
    regression coefficients

coef_: {array-like}
    alias for `beta_`, regression coefficients

Examples:

See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py

import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)

fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
                                dropout=4.31054687e-01,
                                n_clusters=int(1.71484375e+00),
                                lambda1=1.24023438e+01, lambda2=7.30263672e+03)

start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")

print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))

start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
def fit(self, X, y, **kwargs):
164    def fit(self, X, y, **kwargs):
165        """Fit Ridge model to training data (X, y).
166
167        Args:
168
169            X: {array-like}, shape = [n_samples, n_features]
170                Training vectors, where n_samples is the number
171                of samples and n_features is the number of features.
172
173            y: array-like, shape = [n_samples]
174                Target values.
175
176            **kwargs: additional parameters to be passed to
177                    self.cook_training_set or self.obj.fit
178
179        Returns:
180
181            self: object
182
183        """
184
185        sys_platform = platform.system()
186
187        assert mx.is_factor(y), "y must contain only integers"
188
189        self.classes_ = np.unique(y)  # for compatibility with sklearn
190        self.n_classes_ = len(self.classes_)  # for compatibility with sklearn
191
192        output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
193
194        n_X, p_X = X.shape
195        n_Z, p_Z = scaled_Z.shape
196
197        self.n_classes = len(np.unique(y))
198
199        # multitask response
200        Y = mo.one_hot_encode2(output_y, self.n_classes)
201
202        if self.n_clusters > 0:
203            if self.encode_clusters == True:
204                n_features = p_X + self.n_clusters
205            else:
206                n_features = p_X + 1
207        else:
208            n_features = p_X
209
210        X_ = scaled_Z[:, 0:n_features]
211        Phi_X_ = scaled_Z[:, n_features:p_Z]
212
213        B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag(
214            np.repeat(1, X_.shape[1])
215        )
216        C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend)
217        D = mo.crossprod(
218            x=Phi_X_, backend=self.backend
219        ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1]))
220
221        if sys_platform in ("Linux", "Darwin"):
222            B_inv = pinv(B) if self.backend == "cpu" else jpinv(B)
223        else:
224            B_inv = pinv(B)
225
226        W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend)
227        S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend)
228
229        if sys_platform in ("Linux", "Darwin"):
230            S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat)
231        else:
232            S_inv = pinv(S_mat)
233
234        Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend)
235        inv = mo.rbind(
236            mo.cbind(
237                x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend),
238                y=-np.transpose(Y2),
239                backend=self.backend,
240            ),
241            mo.cbind(x=-Y2, y=S_inv, backend=self.backend),
242            backend=self.backend,
243        )
244
245        self.beta_ = mo.safe_sparse_dot(
246            a=inv,
247            b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend),
248            backend=self.backend,
249        )
250        self.coef_ = self.beta_  # sklearn compatibility
251        self.classes_ = np.unique(y)
252        self._is_fitted = True
253        return self

Fit Ridge model to training data (X, y).

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

**kwargs: additional parameters to be passed to
        self.cook_training_set or self.obj.fit

Returns:

self: object
def predict(self, X, **kwargs):
255    def predict(self, X, **kwargs):
256        """Predict test data X.
257
258        Args:
259
260            X: {array-like}, shape = [n_samples, n_features]
261                Training vectors, where n_samples is the number
262                of samples and n_features is the number of features.
263
264            **kwargs: additional parameters to be passed to
265                    self.cook_test_set
266
267        Returns:
268
269            model predictions: {array-like}
270
271        """
272
273        return np.argmax(self.predict_proba(X, **kwargs), axis=1)

Predict test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

model predictions: {array-like}
def predict_proba(self, X, **kwargs):
275    def predict_proba(self, X, **kwargs):
276        """Predict probabilities for test data X.
277
278        Args:
279
280            X: {array-like}, shape = [n_samples, n_features]
281                Training vectors, where n_samples is the number
282                of samples and n_features is the number of features.
283
284            **kwargs: additional parameters to be passed to
285                    self.cook_test_set
286
287        Returns:
288
289            probability estimates for test data: {array-like}
290
291        """
292
293        if len(X.shape) == 1:
294            n_features = X.shape[0]
295            new_X = mo.rbind(
296                x=X.reshape(1, n_features),
297                y=np.ones(n_features).reshape(1, n_features),
298                backend=self.backend,
299            )
300
301            Z = self.cook_test_set(new_X, **kwargs)
302
303        else:
304            Z = self.cook_test_set(X, **kwargs)
305
306        ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend)
307
308        exp_ZB = np.exp(ZB)
309
310        return exp_ZB / exp_ZB.sum(axis=1)[:, None]

Predict probabilities for test data X.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

**kwargs: additional parameters to be passed to
        self.cook_test_set

Returns:

probability estimates for test data: {array-like}
def score(self, X, y, scoring=None):
312    def score(self, X, y, scoring=None):
313        """Scoring function for classification.
314
315        Args:
316
317            X: {array-like}, shape = [n_samples, n_features]
318                Training vectors, where n_samples is the number
319                of samples and n_features is the number of features.
320
321            y: array-like, shape = [n_samples]
322                Target values.
323
324            scoring: str
325                scoring method (default is accuracy)
326
327        Returns:
328
329            score: float
330        """
331
332        if scoring is None:
333            scoring = "accuracy"
334
335        if scoring == "accuracy":
336            return skm2.accuracy_score(y, self.predict(X))
337
338        if scoring == "f1":
339            return skm2.f1_score(y, self.predict(X))
340
341        if scoring == "precision":
342            return skm2.precision_score(y, self.predict(X))
343
344        if scoring == "recall":
345            return skm2.recall_score(y, self.predict(X))
346
347        if scoring == "roc_auc":
348            return skm2.roc_auc_score(y, self.predict(X))
349
350        if scoring == "log_loss":
351            return skm2.log_loss(y, self.predict_proba(X))
352
353        if scoring == "balanced_accuracy":
354            return skm2.balanced_accuracy_score(y, self.predict(X))
355
356        if scoring == "average_precision":
357            return skm2.average_precision_score(y, self.predict(X))
358
359        if scoring == "neg_brier_score":
360            return -skm2.brier_score_loss(y, self.predict_proba(X))
361
362        if scoring == "neg_log_loss":
363            return -skm2.log_loss(y, self.predict_proba(X))

Scoring function for classification.

Args:

X: {array-like}, shape = [n_samples, n_features]
    Training vectors, where n_samples is the number
    of samples and n_features is the number of features.

y: array-like, shape = [n_samples]
    Target values.

scoring: str
    scoring method (default is accuracy)

Returns:

score: float
class Ridge2Forecaster:
 21class Ridge2Forecaster:
 22    """Vectorized Ridge2 RVFL for multivariate time series forecasting.
 23
 24    Parameters
 25    ----------
 26    lags : int, optional
 27        Number of lags to use for feature engineering, by default 1
 28    nb_hidden : int, optional
 29        Number of hidden units, by default 5
 30    activ : str, optional
 31        Activation function, by default 'relu'
 32    lambda_1 : float, optional
 33        Ridge regularization parameter for input features, by default 0.1
 34    lambda_2 : float, optional
 35        Ridge regularization parameter for hidden units, by default 0.1
 36    nodes_sim : str, optional
 37        Type of quasi-random sequence for weight initialization, by default 'sobol'
 38    seed : int, optional
 39        Random seed for reproducibility, by default 42
 40    """
 41
 42    def __init__(
 43        self,
 44        lags=1,
 45        nb_hidden=5,
 46        activ="relu",
 47        lambda_1=0.1,
 48        lambda_2=0.1,
 49        nodes_sim="sobol",
 50        seed=42,
 51    ):
 52        if not JAX_AVAILABLE:
 53            raise RuntimeError(
 54                "JAX is required for this feature. Install with: pip install yourpackage[jax]"
 55            )
 56
 57        self.lags = lags
 58        self.nb_hidden = nb_hidden
 59        self.lambda_1 = lambda_1
 60        self.lambda_2 = lambda_2
 61        self.nodes_sim = nodes_sim
 62        self.seed = seed
 63        self.coef_ = None
 64
 65        # Activation functions
 66        activations = {
 67            "relu": lambda x: jnp.maximum(0, x),
 68            "sigmoid": lambda x: 1 / (1 + jnp.exp(-x)),
 69            "tanh": jnp.tanh,
 70            "linear": lambda x: x,
 71        }
 72        self.activation = jax.jit(activations[activ])
 73
 74    def _create_lags(self, y):
 75        """Create lagged feature matrix (vectorized)."""
 76        n, p = y.shape
 77        X = jnp.concatenate(
 78            [y[self.lags - i - 1: n - i - 1] for i in range(self.lags)], axis=1
 79        )
 80        Y = y[self.lags:]
 81        return X, Y
 82
 83    def _init_weights(self, n_features):
 84        """Initialize hidden layer weights using quasi-random sequences."""
 85        total_dim = n_features * self.nb_hidden
 86
 87        if self.nodes_sim == "sobol":
 88            sampler = qmc.Sobol(d=total_dim, scramble=False, seed=self.seed)
 89            W = sampler.random(1).reshape(n_features, self.nb_hidden)
 90            W = 2 * W - 1
 91        else:
 92            key = jax.random.PRNGKey(self.seed)
 93            W = jax.random.uniform(
 94                key, (n_features, self.nb_hidden), minval=-1, maxval=1
 95            )
 96
 97        return jnp.array(W)
 98
 99    if JAX_AVAILABLE:
100
101        @partial(jax.jit, static_argnums=(0,))
102        def _compute_hidden(self, X, W):
103            """Compute hidden layer features (vectorized)."""
104            return self.activation(X @ W)
105
106        @partial(jax.jit, static_argnums=(0,))
107        def _solve_ridge2(self, X, H, Y):
108            """Solve ridge regression with dual regularization."""
109            n, p_x = X.shape
110            _, p_h = H.shape
111
112            Y_mean = jnp.mean(Y, axis=0)
113            Y_c = Y - Y_mean
114
115            X_mean = jnp.mean(X, axis=0)
116            X_std = jnp.std(X, axis=0)
117            X_std = jnp.where(X_std == 0, 1.0, X_std)
118            X_s = (X - X_mean) / X_std
119
120            H_mean = jnp.mean(H, axis=0)
121            H_std = jnp.std(H, axis=0)
122            H_std = jnp.where(H_std == 0, 1.0, H_std)
123            H_s = (H - H_mean) / H_std
124
125            XX = X_s.T @ X_s + self.lambda_1 * jnp.eye(p_x)
126            XH = X_s.T @ H_s
127            HH = H_s.T @ H_s + self.lambda_2 * jnp.eye(p_h)
128
129            XX_inv = jnp.linalg.inv(XX)
130            S = HH - XH.T @ XX_inv @ XH
131            S_inv = jnp.linalg.inv(S)
132
133            XY = X_s.T @ Y_c
134            HY = H_s.T @ Y_c
135
136            beta = XX_inv @ (XY - XH @ S_inv @ (HY - XH.T @ XX_inv @ XY))
137            gamma = S_inv @ (HY - XH.T @ beta)
138            self.coef_ = jnp.concatenate([beta, gamma], axis=1)
139
140            return beta, gamma, Y_mean, X_mean, X_std, H_mean, H_std
141
142    def fit(self, y):
143        """Fit the Ridge2 model.
144
145        Parameters
146        ----------
147        y : array-like of shape (n_samples,)
148            Target values.
149        """
150        y = jnp.array(y)
151        if y.ndim == 1:
152            y = y[:, None]
153
154        X, Y = self._create_lags(y)
155        self.n_series = Y.shape[1]
156
157        self.W = self._init_weights(X.shape[1])
158        H = self._compute_hidden(X, self.W)
159
160        (
161            self.beta,
162            self.gamma,
163            self.Y_mean,
164            self.X_mean,
165            self.X_std,
166            self.H_mean,
167            self.H_std,
168        ) = self._solve_ridge2(X, H, Y)
169
170        # Compute residuals for prediction intervals
171        X_s = (X - self.X_mean) / self.X_std
172        H_s = (H - self.H_mean) / self.H_std
173        fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean
174        self.residuals = np.array(Y - fitted)
175
176        self.last_obs = y[-self.lags:]
177        return self
178
179    if JAX_AVAILABLE:
180
181        @partial(jax.jit, static_argnums=(0,))
182        def _predict_step(self, x_new):
183            """Single prediction step (JIT-compiled).
184
185            Parameters
186            ----------
187            x_new : array-like of shape (n_features,)
188                New input data.
189
190            Returns
191            -------
192            y_next : float
193                Next-step prediction.
194            """
195            x_s = (x_new - self.X_mean) / self.X_std
196            h = self.activation(x_s @ self.W)
197            h_s = (h - self.H_mean) / self.H_std
198            return x_s @ self.beta + h_s @ self.gamma + self.Y_mean
199
200    def _forecast(self, h=5):
201        """Generate h-step ahead recursive forecasts.
202
203        Parameters
204        ----------
205        h : int, optional
206            Number of steps to forecast, by default 5
207
208        Returns
209        -------
210        forecasts : array-like of shape (h,)
211            Forecasted values.
212        """
213        forecasts = []
214        current = self.last_obs.copy()
215
216        for _ in range(h):
217            x_new = current.flatten()[None, :]
218            y_next = self._predict_step(x_new)[0]
219            forecasts.append(y_next)
220            current = jnp.vstack([current[1:], y_next])
221
222        return jnp.array(forecasts)
223
224    def predict(self, h=5, level=None, method="gaussian", B=100):
225        """Generate prediction intervals with proper uncertainty propagation.
226
227        Parameters
228        ----------
229        h : int, optional
230            Number of steps to forecast, by default 5
231        level : float, optional
232            Confidence level for prediction intervals, by default None
233        method : str, optional
234            Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian'
235        B : int, optional
236            Number of bootstrap samples, by default 100
237
238        Returns
239        -------
240        point_forecast : array-like of shape (h,)
241            Point forecasted values.
242        lower : array-like of shape (h,)
243            Lower bounds of prediction intervals.
244        upper : array-like of shape (h,)
245            Upper bounds of prediction intervals.
246        """
247
248        point_forecast = self._forecast(h)
249
250        if level is None:
251            return point_forecast
252
253        # probabilistic prediction intervals
254        if method == "gaussian":
255            # Use residual std with horizon-dependent scaling
256            residual_std = np.std(self.residuals, axis=0)
257            z = norm.ppf(1 - (1 - level / 100) / 2)
258
259            # Scale uncertainty by sqrt(h) for each horizon
260            horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None]
261            std_expanded = residual_std * horizon_scale
262
263            lower = point_forecast - z * std_expanded
264            upper = point_forecast + z * std_expanded
265
266        elif method == "bootstrap":
267            # Proper residual bootstrap
268            key = jax.random.PRNGKey(self.seed)
269            n_residuals = len(self.residuals)
270            sims = []
271
272            for _ in range(B):
273                key, subkey = jax.random.split(key)
274                boot_indices = np.random.choice(
275                    n_residuals, size=h, replace=True
276                )
277                boot_resids = self.residuals[boot_indices]
278
279                current = self.last_obs.copy()
280                path = []
281
282                for t in range(h):
283                    x_new = current.flatten()[None, :]
284                    y_pred = self._predict_step(x_new)[0]
285                    y_sim = y_pred + boot_resids[t]
286                    path.append(y_sim)
287                    current = jnp.vstack([current[1:], y_sim])
288
289                sims.append(jnp.array(path))
290
291            sims = jnp.array(sims)
292            lower = jnp.percentile(sims, (100 - level) / 2, axis=0)
293            upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0)
294
295        return {
296            "mean": np.array(point_forecast),
297            "lower": np.array(lower),
298            "upper": np.array(upper),
299        }

Vectorized Ridge2 RVFL for multivariate time series forecasting.

Parameters

lags : int, optional Number of lags to use for feature engineering, by default 1 nb_hidden : int, optional Number of hidden units, by default 5 activ : str, optional Activation function, by default 'relu' lambda_1 : float, optional Ridge regularization parameter for input features, by default 0.1 lambda_2 : float, optional Ridge regularization parameter for hidden units, by default 0.1 nodes_sim : str, optional Type of quasi-random sequence for weight initialization, by default 'sobol' seed : int, optional Random seed for reproducibility, by default 42

def fit(self, y):
142    def fit(self, y):
143        """Fit the Ridge2 model.
144
145        Parameters
146        ----------
147        y : array-like of shape (n_samples,)
148            Target values.
149        """
150        y = jnp.array(y)
151        if y.ndim == 1:
152            y = y[:, None]
153
154        X, Y = self._create_lags(y)
155        self.n_series = Y.shape[1]
156
157        self.W = self._init_weights(X.shape[1])
158        H = self._compute_hidden(X, self.W)
159
160        (
161            self.beta,
162            self.gamma,
163            self.Y_mean,
164            self.X_mean,
165            self.X_std,
166            self.H_mean,
167            self.H_std,
168        ) = self._solve_ridge2(X, H, Y)
169
170        # Compute residuals for prediction intervals
171        X_s = (X - self.X_mean) / self.X_std
172        H_s = (H - self.H_mean) / self.H_std
173        fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean
174        self.residuals = np.array(Y - fitted)
175
176        self.last_obs = y[-self.lags:]
177        return self

Fit the Ridge2 model.

Parameters

y : array-like of shape (n_samples,) Target values.

def predict(self, h=5, level=None, method='gaussian', B=100):
224    def predict(self, h=5, level=None, method="gaussian", B=100):
225        """Generate prediction intervals with proper uncertainty propagation.
226
227        Parameters
228        ----------
229        h : int, optional
230            Number of steps to forecast, by default 5
231        level : float, optional
232            Confidence level for prediction intervals, by default None
233        method : str, optional
234            Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian'
235        B : int, optional
236            Number of bootstrap samples, by default 100
237
238        Returns
239        -------
240        point_forecast : array-like of shape (h,)
241            Point forecasted values.
242        lower : array-like of shape (h,)
243            Lower bounds of prediction intervals.
244        upper : array-like of shape (h,)
245            Upper bounds of prediction intervals.
246        """
247
248        point_forecast = self._forecast(h)
249
250        if level is None:
251            return point_forecast
252
253        # probabilistic prediction intervals
254        if method == "gaussian":
255            # Use residual std with horizon-dependent scaling
256            residual_std = np.std(self.residuals, axis=0)
257            z = norm.ppf(1 - (1 - level / 100) / 2)
258
259            # Scale uncertainty by sqrt(h) for each horizon
260            horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None]
261            std_expanded = residual_std * horizon_scale
262
263            lower = point_forecast - z * std_expanded
264            upper = point_forecast + z * std_expanded
265
266        elif method == "bootstrap":
267            # Proper residual bootstrap
268            key = jax.random.PRNGKey(self.seed)
269            n_residuals = len(self.residuals)
270            sims = []
271
272            for _ in range(B):
273                key, subkey = jax.random.split(key)
274                boot_indices = np.random.choice(
275                    n_residuals, size=h, replace=True
276                )
277                boot_resids = self.residuals[boot_indices]
278
279                current = self.last_obs.copy()
280                path = []
281
282                for t in range(h):
283                    x_new = current.flatten()[None, :]
284                    y_pred = self._predict_step(x_new)[0]
285                    y_sim = y_pred + boot_resids[t]
286                    path.append(y_sim)
287                    current = jnp.vstack([current[1:], y_sim])
288
289                sims.append(jnp.array(path))
290
291            sims = jnp.array(sims)
292            lower = jnp.percentile(sims, (100 - level) / 2, axis=0)
293            upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0)
294
295        return {
296            "mean": np.array(point_forecast),
297            "lower": np.array(lower),
298            "upper": np.array(upper),
299        }

Generate prediction intervals with proper uncertainty propagation.

Parameters

h : int, optional Number of steps to forecast, by default 5 level : float, optional Confidence level for prediction intervals, by default None method : str, optional Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' B : int, optional Number of bootstrap samples, by default 100

Returns

point_forecast : array-like of shape (h,) Point forecasted values. lower : array-like of shape (h,) Lower bounds of prediction intervals. upper : array-like of shape (h,) Upper bounds of prediction intervals.

class SubSampler:
 6class SubSampler:
 7    """Subsampling class.
 8
 9    Attributes:
10
11       y: array-like, shape = [n_samples]
12           Target values.
13
14       row_sample: double
15           subsampling fraction
16
17       n_samples: int
18            subsampling by using the number of rows (supersedes row_sample)
19
20       seed: int
21           reproductibility seed
22
23       n_jobs: int
24            number of jobs to run in parallel
25
26       verbose: bool
27            print progress messages and bars
28    """
29
30    def __init__(
31        self,
32        y,
33        row_sample=0.8,
34        n_samples=None,
35        seed=123,
36        n_jobs=None,
37        verbose=False,
38    ):
39        self.y = y
40        self.n_samples = n_samples
41        if self.n_samples is None:
42            assert (
43                row_sample < 1 and row_sample >= 0
44            ), "'row_sample' must be provided, plus < 1 and >= 0"
45            self.row_sample = row_sample
46        else:
47            assert self.n_samples < len(y), "'n_samples' must be < len(y)"
48            self.row_sample = self.n_samples / len(y)
49        self.seed = seed
50        self.indices = None
51        self.n_jobs = n_jobs
52        self.verbose = verbose
53
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Subsampling class.

Attributes:

y: array-like, shape = [n_samples] Target values.

row_sample: double subsampling fraction

n_samples: int subsampling by using the number of rows (supersedes row_sample)

seed: int reproductibility seed

n_jobs: int number of jobs to run in parallel

verbose: bool print progress messages and bars

def subsample(self):
54    def subsample(self):
55        """Returns indices of subsampled input data.
56
57        Examples:
58
59        <ul>
60            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li>
61            <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li>
62        </ul>
63
64        """
65        self.indices = dosubsample(
66            y=self.y,
67            row_sample=self.row_sample,
68            seed=self.seed,
69            n_jobs=self.n_jobs,
70            verbose=self.verbose,
71        )
72        return self.indices

Returns indices of subsampled input data.

Examples: